* Filters/, *Queryable/, beagled/Filter.cs, beagled/LuceneCommon.cs: Allow keyword...
[beagle.git] / beagled / FileSystemQueryable / FileSystemQueryable.cs
blob63d98a04cc469cc8bb8b95e354481e313c254cfb
1 //
2 // FileSystemQueryable.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
30 using System.Reflection;
31 using System.Text;
32 using System.Threading;
34 using Beagle.Daemon;
35 using Beagle.Util;
37 namespace Beagle.Daemon.FileSystemQueryable {
39 [QueryableFlavor (Name="Files", Domain=QueryDomain.Local, RequireInotify=false)]
40 public class FileSystemQueryable : LuceneQueryable {
42 static public bool Debug = false;
44 private const string SplitFilenamePropKey = "beagle:SplitFilename";
45 public const string ExactFilenamePropKey = "beagle:ExactFilename";
46 public const string TextFilenamePropKey = "beagle:Filename";
47 public const string NoPunctFilenamePropKey = "beagle:NoPunctFilename";
48 public const string FilenameExtensionPropKey = "beagle:FilenameExtension";
49 public const string ParentDirUriPropKey = LuceneQueryingDriver.PrivateNamespace + "ParentDirUri";
50 public const string IsDirectoryPropKey = LuceneQueryingDriver.PrivateNamespace + "IsDirectory";
52 // History:
53 // 1: Initially set to force a reindex due to NameIndex changes.
54 // 2: Overhauled everything to use new lucene infrastructure.
55 // 3: Switched to UTC for all times, changed the properties a bit.
56 // 4: Changed the key of TextFilenamePropKey to beagle:Filename - it might be useful in clients.
57 // Make SplitFilenamePropKey unstored
58 const int MINOR_VERSION = 3;
60 private object big_lock = new object ();
62 private IFileEventBackend event_backend;
64 // This is the task that walks the tree structure
65 private TreeCrawlTask tree_crawl_task;
67 // This is the task that finds the next place that
68 // needs to be crawled in the tree and spawns off
69 // the appropriate IndexableGenerator.
70 private FileCrawlTask file_crawl_task;
72 private ArrayList roots = new ArrayList ();
73 private ArrayList roots_by_path = new ArrayList ();
75 private FileNameFilter filter;
77 // This is just a copy of the LuceneQueryable's QueryingDriver
78 // cast into the right type for doing internal->external Uri
79 // lookups.
80 private LuceneNameResolver name_resolver;
82 //////////////////////////////////////////////////////////////////////////
84 private Hashtable cached_uid_by_path = new Hashtable ();
86 //////////////////////////////////////////////////////////////////////////
88 public FileSystemQueryable () : base ("FileSystemIndex", MINOR_VERSION)
90 // Set up our event backend
91 if (Inotify.Enabled) {
92 Logger.Log.Debug ("Starting Inotify Backend");
93 event_backend = new InotifyBackend ();
94 } else {
95 Logger.Log.Debug ("Creating null file event backend");
96 event_backend = new NullFileEventBackend ();
99 tree_crawl_task = new TreeCrawlTask (new TreeCrawlTask.Handler (AddDirectory));
100 tree_crawl_task.Source = this;
102 file_crawl_task = new FileCrawlTask (this);
103 file_crawl_task.Source = this;
105 name_resolver = (LuceneNameResolver) Driver;
106 PreloadDirectoryNameInfo ();
108 // Setup our file-name filter
109 filter = new FileNameFilter (this);
111 // Do the right thing when paths expire
112 DirectoryModel.ExpireEvent +=
113 new DirectoryModel.ExpireHandler (ExpireDirectoryPath);
117 override protected IFileAttributesStore BuildFileAttributesStore ()
119 return new FileAttributesStore_Mixed (IndexDirectory, IndexFingerprint);
122 override protected LuceneQueryingDriver BuildLuceneQueryingDriver (string index_name,
123 int minor_version,
124 bool read_only_mode)
126 return new LuceneNameResolver (index_name, minor_version, read_only_mode);
129 public FileNameFilter Filter {
130 get { return filter; }
133 //////////////////////////////////////////////////////////////////////////
136 // This is where we build our Indexables
139 public static void AddStandardPropertiesToIndexable (Indexable indexable,
140 string name,
141 Guid parent_id,
142 bool mutable)
144 StringBuilder sb;
145 sb = new StringBuilder ();
147 string no_ext, ext, no_punct;
148 no_ext = Path.GetFileNameWithoutExtension (name);
149 ext = Path.GetExtension (name).ToLower ();
151 sb.Append (no_ext);
152 for (int i = 0; i < sb.Length; ++i)
153 if (! Char.IsLetterOrDigit (sb [i]))
154 sb [i] = ' ';
155 no_punct = sb.ToString ();
158 Property prop;
160 prop = Property.NewKeyword (ExactFilenamePropKey, name);
161 prop.IsMutable = mutable;
162 indexable.AddProperty (prop);
164 prop = Property.New (TextFilenamePropKey, no_ext);
165 prop.IsMutable = mutable;
166 indexable.AddProperty (prop);
168 prop = Property.New (NoPunctFilenamePropKey, no_punct);
169 prop.IsMutable = mutable;
170 indexable.AddProperty (prop);
172 prop = Property.NewUnsearched (FilenameExtensionPropKey, ext);
173 prop.IsMutable = mutable;
174 indexable.AddProperty (prop);
176 string str;
177 str = StringFu.FuzzyDivide (no_ext);
178 prop = Property.NewUnstored (SplitFilenamePropKey, str);
179 prop.IsMutable = mutable;
180 indexable.AddProperty (prop);
182 if (parent_id == Guid.Empty)
183 return;
185 str = GuidFu.ToUriString (parent_id);
186 // We use the uri here to recycle terms in the index,
187 // since each directory's uri will already be indexed.
188 prop = Property.NewUnsearched (ParentDirUriPropKey, str);
189 prop.IsMutable = mutable;
190 indexable.AddProperty (prop);
193 public static void AddStandardPropertiesToIndexable (Indexable indexable,
194 string name,
195 DirectoryModel parent,
196 bool mutable)
198 AddStandardPropertiesToIndexable (indexable,
199 name,
200 parent == null ? Guid.Empty : parent.UniqueId,
201 mutable);
203 indexable.LocalState ["Parent"] = parent;
206 public static Indexable DirectoryToIndexable (string path,
207 Guid id,
208 DirectoryModel parent)
210 Indexable indexable;
211 indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id));
212 indexable.MimeType = "inode/directory";
213 indexable.NoContent = true;
214 indexable.Timestamp = Directory.GetLastWriteTimeUtc (path);
216 string name;
217 if (parent == null)
218 name = path;
219 else
220 name = Path.GetFileName (path);
221 AddStandardPropertiesToIndexable (indexable, name, parent, true);
223 Property prop;
224 prop = Property.NewBool (IsDirectoryPropKey, true);
225 prop.IsMutable = true; // we want this in the secondary index, for efficiency
226 indexable.AddProperty (prop);
228 indexable.LocalState ["Path"] = path;
230 return indexable;
233 public static Indexable FileToIndexable (string path,
234 Guid id,
235 DirectoryModel parent,
236 bool crawl_mode)
238 Indexable indexable;
239 indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id));
240 indexable.Timestamp = File.GetLastWriteTimeUtc (path);
241 indexable.ContentUri = UriFu.PathToFileUri (path);
242 indexable.Crawled = crawl_mode;
243 indexable.Filtering = Beagle.IndexableFiltering.Always;
245 AddStandardPropertiesToIndexable (indexable, Path.GetFileName (path), parent, true);
247 indexable.LocalState ["Path"] = path;
249 return indexable;
252 private static Indexable NewRenamingIndexable (string name,
253 Guid id,
254 DirectoryModel parent,
255 string last_known_path)
257 Indexable indexable;
258 indexable = new Indexable (IndexableType.PropertyChange, GuidFu.ToUri (id));
260 AddStandardPropertiesToIndexable (indexable, name, parent, true);
262 indexable.LocalState ["Id"] = id;
263 indexable.LocalState ["LastKnownPath"] = last_known_path;
265 return indexable;
268 //////////////////////////////////////////////////////////////////////////
271 // Mapping from directory ids to paths
274 private Hashtable dir_models_by_id = new Hashtable ();
275 private Hashtable name_info_by_id = new Hashtable ();
277 // We fall back to using the name information in the index
278 // until we've fully constructed our set of DirectoryModels.
279 private void PreloadDirectoryNameInfo ()
281 ICollection all;
282 all = name_resolver.GetAllDirectoryNameInfo ();
283 foreach (LuceneNameResolver.NameInfo info in all)
284 name_info_by_id [info.Id] = info;
287 // This only works for directories.
288 private string UniqueIdToDirectoryName (Guid id)
290 DirectoryModel dir;
291 dir = dir_models_by_id [id] as DirectoryModel;
292 if (dir != null)
293 return dir.FullName;
295 LuceneNameResolver.NameInfo info;
296 info = name_info_by_id [id] as LuceneNameResolver.NameInfo;
297 if (info != null) {
298 if (info.ParentId == Guid.Empty) // i.e. this is a root
299 return info.Name;
300 else {
301 string parent_name;
302 parent_name = UniqueIdToDirectoryName (info.ParentId);
303 if (parent_name == null)
304 return null;
305 return Path.Combine (parent_name, info.Name);
309 return null;
312 private void CacheDirectoryNameChange (Guid id, Guid new_parent_id, string new_name)
314 LuceneNameResolver.NameInfo info;
315 info = name_info_by_id [id] as LuceneNameResolver.NameInfo;
316 if (info != null) {
317 info.ParentId = new_parent_id;
318 info.Name = new_name;
322 private string ToFullPath (string name, Guid parent_id)
324 // This is the correct behavior for roots.
325 if (parent_id == Guid.Empty)
326 return name;
328 string parent_name;
329 parent_name = UniqueIdToDirectoryName (parent_id);
330 if (parent_name == null)
331 return null;
333 return Path.Combine (parent_name, name);
336 // This works for both files and directories.
337 private string UniqueIdToFullPath (Guid id)
339 // First, check if it is a directory.
340 string path;
341 path = UniqueIdToDirectoryName (id);
342 if (path != null)
343 return path;
345 // If not, try to pull name information out of the index.
346 LuceneNameResolver.NameInfo info;
347 info = name_resolver.GetNameInfoById (id);
348 if (info == null)
349 return null;
350 return ToFullPath (info.Name, info.ParentId);
353 private void RegisterId (string name, DirectoryModel dir, Guid id)
355 cached_uid_by_path [Path.Combine (dir.FullName, name)] = id;
358 private void ForgetId (string path)
360 cached_uid_by_path.Remove (path);
363 // This works for files. (It probably works for directories
364 // too, but you should use one of the more efficient means
365 // above if you know it is a directory.)
366 private Guid NameAndParentToId (string name, DirectoryModel dir)
368 string path;
369 path = Path.Combine (dir.FullName, name);
371 Guid unique_id;
372 if (cached_uid_by_path.Contains (path))
373 unique_id = (Guid) cached_uid_by_path [path];
374 else
375 unique_id = name_resolver.GetIdByNameAndParentId (name, dir.UniqueId);
377 return unique_id;
380 //////////////////////////////////////////////////////////////////////////
383 // Directory-related methods
386 private Hashtable dir_models_by_path = new Hashtable ();
388 private DirectoryModel GetDirectoryModelByPath (string path)
390 DirectoryModel dir;
392 lock (dir_models_by_path) {
393 dir = dir_models_by_path [path] as DirectoryModel;
394 if (dir != null)
395 return dir;
398 // Walk each root until we find the correct path
399 foreach (DirectoryModel root in roots) {
400 dir = root.WalkTree (path);
401 if (dir != null) {
402 lock (dir_models_by_path)
403 dir_models_by_path [path] = dir;
404 break;
408 return dir;
411 private void ExpireDirectoryPath (string expired_path, Guid unique_id)
413 if (Debug)
414 Logger.Log.Debug ("Expired '{0}'", expired_path);
416 lock (dir_models_by_path)
417 dir_models_by_path.Remove (expired_path);
420 public void AddDirectory (DirectoryModel parent, string name)
422 // Ignore the stuff we want to ignore.
423 if (filter.Ignore (parent, name, true))
424 return;
426 if (parent != null && parent.HasChildWithName (name))
427 return;
429 string path;
430 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
432 if (Debug)
433 Logger.Log.Debug ("Adding directory '{0}'", path, name);
435 if (! Directory.Exists (path)) {
436 Logger.Log.Error ("Can't add directory: '{0}' does not exist", path);
437 return;
440 FileAttributes attr;
441 attr = FileAttributesStore.Read (path);
443 // Note that we don't look at the mtime of a directory when
444 // deciding whether or not to index it.
445 bool needs_indexing = false;
446 if (attr == null) {
447 // If it has no attributes, it definitely needs
448 // indexing.
449 needs_indexing = true;
450 } else {
451 // Make sure that it still has the same name as before.
452 // If not, we need to re-index it.
453 // We can do this since we preloaded all of the name
454 // info in the directory via PreloadDirectoryNameInfo.
455 string last_known_name;
456 last_known_name = UniqueIdToDirectoryName (attr.UniqueId);
457 if (last_known_name != path) {
458 Logger.Log.Debug ("'{0}' now seems to be called '{1}'", last_known_name, path);
459 needs_indexing = true;
463 // If we can't descend into this directory, we want to
464 // index it but not build a DirectoryModel for it.
465 // FIXME: We should do the right thing when a
466 // directory's permissions change.
467 bool is_walkable;
468 is_walkable = DirectoryWalker.IsWalkable (path);
469 if (! is_walkable)
470 Logger.Log.Debug ("Can't walk '{0}'", path);
472 if (needs_indexing)
473 ScheduleDirectory (name, parent, attr, is_walkable);
474 else if (is_walkable)
475 RegisterDirectory (name, parent, attr);
478 public void AddRoot (string path)
480 path = StringFu.SanitizePath (path);
481 Logger.Log.Debug ("Adding root: {0}", path);
483 if (roots_by_path.Contains (path)) {
484 Logger.Log.Error ("Trying to add an existing root: {0}", path);
485 return;
488 // We need to have the path key in the roots hashtable
489 // for the filtering to work as we'd like before the root
490 // is actually added.
491 roots_by_path.Add (path);
493 AddDirectory (null, path);
496 public void RemoveRoot (string path)
498 Logger.Log.Debug ("Removing root: {0}", path);
500 if (! roots_by_path.Contains (path)) {
501 Logger.Log.Error ("Trying to remove a non-existing root: {0}", path);
502 return;
505 // Find our directory model for the root
506 DirectoryModel dir;
507 dir = GetDirectoryModelByPath (path);
509 if (dir == null) {
510 Logger.Log.Error ("Could not find directory-model for root: {0}", path);
511 return;
514 // FIXME: Make sure we're emptying the crawler task of any sub-directories
515 // to the root we're removing. It's not a big deal since we do an Ignore-check
516 // in there, but it would be nice.
518 roots_by_path.Remove (path);
519 roots.Remove (dir);
521 // Clean out the root from our directory cache.
522 RemoveDirectory (dir);
525 private void ScheduleDirectory (string name,
526 DirectoryModel parent,
527 FileAttributes attr,
528 bool is_walkable)
530 string path;
531 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
533 Guid id;
534 id = (attr == null) ? Guid.NewGuid () : attr.UniqueId;
536 DateTime last_crawl;
537 last_crawl = (attr == null) ? DateTime.MinValue : attr.LastWriteTime;
539 Indexable indexable;
540 indexable = DirectoryToIndexable (path, id, parent);
542 indexable.LocalState ["Name"] = name;
543 indexable.LocalState ["LastCrawl"] = last_crawl;
544 indexable.LocalState ["IsWalkable"] = is_walkable;
546 Scheduler.Task task;
547 task = NewAddTask (indexable);
548 task.Priority = Scheduler.Priority.Delayed;
549 ThisScheduler.Add (task);
552 private void RegisterDirectory (string name, DirectoryModel parent, FileAttributes attr)
554 string path;
555 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
557 if (Debug)
558 Logger.Log.Debug ("Registered directory '{0}' ({1})", path, attr.UniqueId);
560 DirectoryModel dir;
561 if (parent == null)
562 dir = DirectoryModel.NewRoot (big_lock, path, attr);
563 else
564 dir = parent.AddChild (name, attr);
566 if (Directory.GetLastWriteTimeUtc (path) > attr.LastWriteTime) {
567 dir.State = DirectoryState.Dirty;
568 if (Debug)
569 Logger.Log.Debug ("'{0}' is dirty", path);
572 if (Debug) {
573 if (dir.IsRoot)
574 Logger.Log.Debug ("Created model '{0}'", dir.FullName);
575 else
576 Logger.Log.Debug ("Created model '{0}' with parent '{1}'", dir.FullName, dir.Parent.FullName);
579 // Add any roots we create to the list of roots
580 if (dir.IsRoot)
581 roots.Add (dir);
583 // Add the directory to our by-id hash, and remove any NameInfo
584 // we might have cached about it.
585 dir_models_by_id [dir.UniqueId] = dir;
586 name_info_by_id.Remove (dir.UniqueId);
588 // Start watching the directory.
589 dir.WatchHandle = event_backend.CreateWatch (path);
591 // Schedule this directory for crawling.
592 if (tree_crawl_task.Add (dir))
593 ThisScheduler.Add (tree_crawl_task);
595 // Make sure that our file crawling task is active,
596 // since presumably we now have something new to crawl.
597 ActivateFileCrawling ();
600 private void ForgetDirectoryRecursively (DirectoryModel dir)
602 foreach (DirectoryModel child in dir.Children)
603 ForgetDirectoryRecursively (child);
605 if (dir.WatchHandle != null)
606 event_backend.ForgetWatch (dir.WatchHandle);
607 dir_models_by_id.Remove (dir.UniqueId);
608 // We rely on the expire event to remove it from dir_models_by_path
611 private void RemoveDirectory (DirectoryModel dir)
613 Uri uri;
614 uri = GuidFu.ToUri (dir.UniqueId);
616 Indexable indexable;
617 indexable = new Indexable (IndexableType.Remove, uri);
619 // Remember a copy of our external Uri, so that we can
620 // easily remap it in the PostRemoveHook.
621 indexable.LocalState ["RemovedUri"] = UriFu.PathToFileUri (dir.FullName);
623 // Forget watches and internal references
624 ForgetDirectoryRecursively (dir);
626 // Calling Remove will expire the path names,
627 // so name caches will be cleaned up accordingly.
628 dir.Remove ();
630 Scheduler.Task task;
631 task = NewAddTask (indexable); // We *add* the indexable to *remove* the index item
632 task.Priority = Scheduler.Priority.Immediate;
633 ThisScheduler.Add (task);
636 public void RemoveDirectory (string path)
638 DirectoryModel dir = GetDirectoryModelByPath (path);
639 if (dir != null)
640 RemoveDirectory (dir);
643 private void MoveDirectory (DirectoryModel dir,
644 DirectoryModel new_parent, // or null if we are just renaming
645 string new_name)
647 if (dir == null) {
648 Logger.Log.Warn ("Couldn't find DirectoryModel for directory moving to '{0}' in '{1}', so it was hopefully never there.",
649 new_name, new_parent.FullName);
650 AddDirectory (new_parent, new_name);
651 return;
654 if (dir.IsRoot)
655 throw new Exception ("Can't move root " + dir.FullName);
657 // We'll need this later in order to generate the
658 // right change notification.
659 string old_path;
660 old_path = dir.FullName;
662 if (new_parent != null && new_parent != dir.Parent)
663 dir.MoveTo (new_parent, new_name);
664 else
665 dir.Name = new_name;
667 // Remember this by path
668 lock (dir_models_by_path)
669 dir_models_by_path [dir.FullName] = dir;
671 CacheDirectoryNameChange (dir.UniqueId, dir.Parent.UniqueId, new_name);
673 Indexable indexable;
674 indexable = NewRenamingIndexable (new_name,
675 dir.UniqueId,
676 dir.Parent, // == new_parent
677 old_path);
678 indexable.LocalState ["OurDirectoryModel"] = dir;
680 Scheduler.Task task;
681 task = NewAddTask (indexable);
682 task.Priority = Scheduler.Priority.Immediate;
683 // Danger Will Robinson!
684 // We need to use BlockUntilNoCollision to get the correct notifications
685 // in a mv a b; mv b c; mv c a situation.
686 // FIXME: And now that type no longer exists!
687 ThisScheduler.Add (task);
690 //////////////////////////////////////////////////////////////////////////
693 // This code controls the directory crawl order
696 private DirectoryModel StupidWalk (DirectoryModel prev_best, DirectoryModel contender)
698 if (contender.NeedsCrawl) {
699 if (prev_best == null || prev_best.CompareTo (contender) < 0)
700 prev_best = contender;
703 foreach (DirectoryModel child in contender.Children)
704 prev_best = StupidWalk (prev_best, child);
706 return prev_best;
709 public DirectoryModel GetNextDirectoryToCrawl ()
711 DirectoryModel next_dir = null;
713 foreach (DirectoryModel root in roots)
714 next_dir = StupidWalk (next_dir, root);
716 return next_dir;
719 public void DoneCrawlingOneDirectory (DirectoryModel dir)
721 if (! dir.IsAttached)
722 return;
724 FileAttributes attr;
725 attr = FileAttributesStore.Read (dir.FullName);
727 // Don't mark ourselves; let the crawler redo us
728 if (attr == null)
729 return;
731 // We don't have to be super-careful about this since
732 // we only use the FileAttributes mtime on a directory
733 // to determine its initial state, not whether or not
734 // its index record is up-to-date.
735 attr.LastWriteTime = DateTime.UtcNow;
737 // ...but we do use this to decide which order directories get
738 // crawled in.
739 dir.LastCrawlTime = DateTime.UtcNow;
741 FileAttributesStore.Write (attr);
742 dir.MarkAsClean ();
745 public void MarkDirectoryAsUncrawlable (DirectoryModel dir)
747 if (! dir.IsAttached)
748 return;
750 // If we managed to get set up a watch on this directory,
751 // drop it.
752 if (dir.WatchHandle != null) {
753 event_backend.ForgetWatch (dir.WatchHandle);
754 dir.WatchHandle = null;
757 dir.MarkAsUncrawlable ();
760 public void Recrawl (string path)
762 // Try to find a directory model for the path specified
763 // so that we can re-crawl it.
764 DirectoryModel dir;
765 dir = GetDirectoryModelByPath (path);
767 bool path_is_registered = true;
769 if (dir == null) {
770 dir = GetDirectoryModelByPath (FileSystem.GetDirectoryNameRootOk (path));
771 path_is_registered = false;
773 if (dir == null) {
774 Logger.Log.Debug ("Unable to get directory-model for path: {0}", path);
775 return;
779 Logger.Log.Debug ("Re-crawling {0}", dir.FullName);
781 if (tree_crawl_task.Add (dir))
782 ThisScheduler.Add (tree_crawl_task);
784 if (path_is_registered)
785 Recrawl_Recursive (dir, DirectoryState.PossiblyClean);
787 ActivateFileCrawling ();
788 ActivateDirectoryCrawling ();
791 public void RecrawlEverything ()
793 Logger.Log.Debug ("Re-crawling all directories");
795 foreach (DirectoryModel root in roots)
796 Recrawl_Recursive (root, DirectoryState.PossiblyClean);
798 ActivateFileCrawling ();
799 ActivateDirectoryCrawling ();
802 private void Recrawl_Recursive (DirectoryModel dir, DirectoryState state)
804 dir.State = state;
805 tree_crawl_task.Add (dir);
806 foreach (DirectoryModel sub_dir in dir.Children)
807 Recrawl_Recursive (sub_dir, state);
810 private void ActivateFileCrawling ()
812 if (! file_crawl_task.IsActive)
813 ThisScheduler.Add (file_crawl_task);
816 private void ActivateDirectoryCrawling ()
818 if (! tree_crawl_task.IsActive)
819 ThisScheduler.Add (tree_crawl_task);
822 //////////////////////////////////////////////////////////////////////////
825 // File-related methods
828 private enum RequiredAction {
829 None,
830 Index,
831 Rename,
832 Forget
835 static DateTime epoch = new DateTime (1970, 1, 1, 0, 0, 0);
837 static DateTime ToDateTimeUtc (long time_t)
839 return epoch.AddSeconds (time_t);
842 private RequiredAction DetermineRequiredAction (DirectoryModel dir,
843 string name,
844 FileAttributes attr,
845 out string last_known_path)
847 last_known_path = null;
849 string path;
850 path = Path.Combine (dir.FullName, name);
852 if (Debug)
853 Logger.Log.Debug ("*** What should we do with {0}?", path);
855 if (filter.Ignore (dir, name, false)) {
856 // If there are attributes on the file, we must have indexed
857 // it previously. Since we are ignoring it now, we should strip
858 // any file attributes from it.
859 if (attr != null) {
860 if (Debug)
861 Logger.Log.Debug ("*** Forget it: File is ignored but has attributes");
862 return RequiredAction.Forget;
864 if (Debug)
865 Logger.Log.Debug ("*** Do nothing: File is ignored");
866 return RequiredAction.None;
869 if (attr == null) {
870 if (Debug)
871 Logger.Log.Debug ("*** Index it: File has no attributes");
872 return RequiredAction.Index;
875 // FIXME: This does not take in to account that we might have a better matching filter to use now
876 // That, however, is kind of expensive to figure out since we'd have to do mime-sniffing and shit.
877 if (attr.FilterName != null && attr.FilterVersion > 0) {
878 int current_filter_version;
879 current_filter_version = FilterFactory.GetFilterVersion (attr.FilterName);
881 if (current_filter_version > attr.FilterVersion) {
882 if (Debug)
883 Logger.Log.Debug ("*** Index it: Newer filter version found for filter {0}", attr.FilterName);
884 return RequiredAction.Index;
888 Mono.Unix.Native.Stat stat;
889 try {
890 Mono.Unix.Native.Syscall.stat (path, out stat);
891 } catch (Exception ex) {
892 Logger.Log.Debug ("Caught exception stat-ing {0}", path);
893 Logger.Log.Debug (ex);
894 return RequiredAction.None;
897 DateTime last_write_time, last_attr_time;
898 last_write_time = ToDateTimeUtc (stat.st_mtime);
899 last_attr_time = ToDateTimeUtc (stat.st_ctime);
901 if (attr.LastWriteTime != last_write_time) {
902 if (Debug)
903 Logger.Log.Debug ("*** Index it: MTime has changed ({0} vs {1})", attr.LastWriteTime, last_write_time);
905 // If the file has been copied, it will have the
906 // original file's EAs. Thus we have to check to
907 // make sure that the unique id in the EAs actually
908 // belongs to this file. If not, replace it with a new one.
909 // (Thus touching & then immediately renaming a file can
910 // cause its unique id to change, which is less than
911 // optimal but probably can't be helped.)
912 last_known_path = UniqueIdToFullPath (attr.UniqueId);
913 if (path != last_known_path) {
914 if (Debug)
915 Logger.Log.Debug ("*** Name has also changed, assigning new unique id");
916 attr.UniqueId = Guid.NewGuid ();
919 return RequiredAction.Index;
922 // If the inode ctime is newer than the last time we last
923 // set file attributes, we might have been moved. We don't
924 // strictly compare times due to the fact that although
925 // setting xattrs changes the ctime, if we don't have write
926 // access our metadata will be stored in sqlite, and the
927 // ctime will be at some point in the past.
928 if (attr.LastAttrTime < last_attr_time) {
929 if (Debug)
930 Logger.Log.Debug ("*** CTime is newer, checking last known path ({0} vs {1})", attr.LastAttrTime, last_attr_time);
932 last_known_path = UniqueIdToFullPath (attr.UniqueId);
934 if (last_known_path == null) {
935 if (Debug)
936 Logger.Log.Debug ("*** Index it: CTime has changed, but can't determine last known path");
937 return RequiredAction.Index;
940 // If the name has changed but the mtime
941 // hasn't, the only logical conclusion is that
942 // the file has been renamed.
943 if (path != last_known_path) {
944 if (Debug)
945 Logger.Log.Debug ("*** Rename it: CTime and path has changed");
946 return RequiredAction.Rename;
950 // We don't have to do anything, which is always preferable.
951 if (Debug)
952 Logger.Log.Debug ("*** Do nothing");
953 return RequiredAction.None;
956 // Return an indexable that will do the right thing with a file
957 // (or null, if the right thing is to do nothing)
958 public Indexable GetCrawlingFileIndexable (DirectoryModel dir, string name)
960 string path;
961 path = Path.Combine (dir.FullName, name);
963 FileAttributes attr;
964 attr = FileAttributesStore.Read (path);
966 RequiredAction action;
967 string last_known_path;
968 action = DetermineRequiredAction (dir, name, attr, out last_known_path);
970 if (action == RequiredAction.None)
971 return null;
973 Guid unique_id;
974 if (attr != null)
975 unique_id = attr.UniqueId;
976 else
977 unique_id = Guid.NewGuid ();
979 Indexable indexable = null;
981 switch (action) {
983 case RequiredAction.Index:
984 indexable = FileToIndexable (path, unique_id, dir, true);
985 break;
987 case RequiredAction.Rename:
988 indexable = NewRenamingIndexable (name, unique_id, dir,
989 last_known_path);
990 break;
992 case RequiredAction.Forget:
993 FileAttributesStore.Drop (path);
995 break;
998 return indexable;
1001 public void AddFile (DirectoryModel dir, string name)
1003 string path;
1004 path = Path.Combine (dir.FullName, name);
1006 if (! File.Exists (path))
1007 return;
1009 if (filter.Ignore (dir, name, false))
1010 return;
1012 // If this file already has extended attributes,
1013 // make sure that the name matches the file
1014 // that is in the index. If not, it could be
1015 // a copy of an already-indexed file and should
1016 // be assigned a new unique id.
1017 Guid unique_id = Guid.Empty;
1018 FileAttributes attr;
1019 attr = FileAttributesStore.Read (path);
1020 if (attr != null) {
1021 LuceneNameResolver.NameInfo info;
1022 info = name_resolver.GetNameInfoById (attr.UniqueId);
1023 if (info != null
1024 && info.Name == name
1025 && info.ParentId == dir.UniqueId)
1026 unique_id = attr.UniqueId;
1029 if (unique_id == Guid.Empty)
1030 unique_id = Guid.NewGuid ();
1032 RegisterId (name, dir, unique_id);
1034 Indexable indexable;
1035 indexable = FileToIndexable (path, unique_id, dir, false);
1037 Scheduler.Task task;
1038 task = NewAddTask (indexable);
1039 task.Priority = Scheduler.Priority.Immediate;
1040 ThisScheduler.Add (task);
1043 public void RemoveFile (DirectoryModel dir, string name)
1045 // FIXME: We might as well remove it, even if it was being ignore.
1046 // Right?
1048 Guid unique_id;
1049 unique_id = NameAndParentToId (name, dir);
1050 if (unique_id == Guid.Empty) {
1051 Logger.Log.Warn ("Could resolve unique id of '{0}' in '{1}' for removal, it is probably already gone",
1052 name, dir.FullName);
1053 return;
1056 Uri uri, file_uri;
1057 uri = GuidFu.ToUri (unique_id);
1058 file_uri = UriFu.PathToFileUri (Path.Combine (dir.FullName, name));
1060 Indexable indexable;
1061 indexable = new Indexable (IndexableType.Remove, uri);
1062 indexable.LocalState ["RemovedUri"] = file_uri;
1064 Scheduler.Task task;
1065 task = NewAddTask (indexable);
1066 task.Priority = Scheduler.Priority.Immediate;
1067 ThisScheduler.Add (task);
1070 public void MoveFile (DirectoryModel old_dir, string old_name,
1071 DirectoryModel new_dir, string new_name)
1073 bool old_ignore, new_ignore;
1074 old_ignore = filter.Ignore (old_dir, old_name, false);
1075 new_ignore = filter.Ignore (new_dir, new_name, false);
1077 if (old_ignore && new_ignore)
1078 return;
1080 // If our ignore-state is changing, synthesize the appropriate
1081 // action.
1083 if (old_ignore && ! new_ignore) {
1084 AddFile (new_dir, new_name);
1085 return;
1088 if (! old_ignore && new_ignore) {
1089 RemoveFile (new_dir, new_name);
1090 return;
1093 // We need to find the file's unique id.
1094 // We can't look at the extended attributes w/o making
1095 // assumptions about whether they follow around the
1096 // file (EAs) or the path (sqlite)...
1097 Guid unique_id;
1098 unique_id = NameAndParentToId (old_name, old_dir);
1099 if (unique_id == Guid.Empty) {
1100 // If we can't find the unique ID, we have to
1101 // assume that the original file never made it
1102 // into the index --- thus we treat this as
1103 // an Add.
1104 AddFile (new_dir, new_name);
1105 return;
1108 RegisterId (new_name, new_dir, unique_id);
1110 string old_path;
1111 old_path = Path.Combine (old_dir.FullName, old_name);
1113 ForgetId (old_path);
1115 // FIXME: I think we need to be more conservative when we seen
1116 // events in a directory that has not been fully scanned, just to
1117 // avoid races. i.e. what if we are in the middle of crawling that
1118 // directory and haven't reached this file yet? Then the rename
1119 // will fail.
1120 Indexable indexable;
1121 indexable = NewRenamingIndexable (new_name,
1122 unique_id,
1123 new_dir,
1124 old_path);
1126 Scheduler.Task task;
1127 task = NewAddTask (indexable);
1128 task.Priority = Scheduler.Priority.Immediate;
1129 // Danger Will Robinson!
1130 // We need to use BlockUntilNoCollision to get the correct notifications
1131 // in a mv a b; mv b c; mv c a situation.
1132 // FIXME: And now AddType no longer exists
1133 ThisScheduler.Add (task);
1136 //////////////////////////////////////////////////////////////////////////
1138 // Configuration stuff
1140 public IList Roots {
1141 get {
1142 return roots_by_path;
1146 private void LoadConfiguration ()
1148 if (Conf.Indexing.IndexHomeDir)
1149 AddRoot (PathFinder.HomeDir);
1151 foreach (string root in Conf.Indexing.Roots)
1152 AddRoot (root);
1154 Conf.Subscribe (typeof (Conf.IndexingConfig), OnConfigurationChanged);
1157 private void OnConfigurationChanged (Conf.Section section)
1159 ArrayList roots_wanted = new ArrayList (Conf.Indexing.Roots);
1161 if (Conf.Indexing.IndexHomeDir)
1162 roots_wanted.Add (PathFinder.HomeDir);
1164 IList roots_to_add, roots_to_remove;
1165 ArrayFu.IntersectListChanges (roots_wanted, Roots, out roots_to_add, out roots_to_remove);
1167 foreach (string root in roots_to_remove)
1168 RemoveRoot (root);
1170 foreach (string root in roots_to_add)
1171 AddRoot (root);
1174 //////////////////////////////////////////////////////////////////////////
1177 // Our magic LuceneQueryable hooks
1180 override protected bool PreChildAddHook (Indexable child)
1182 // FIXME: Handling Uri remapping of children is tricky, and there
1183 // is also the issue of properly serializing file: uris that
1184 // contain fragments. For now we just punt it all by dropping
1185 // any child indexables of file system objects.
1186 return false;
1189 override protected void PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
1191 // If we just changed properties, remap to our *old* external Uri
1192 // to make notification work out property.
1193 if (indexable.Type == IndexableType.PropertyChange) {
1195 string last_known_path;
1196 last_known_path = (string) indexable.LocalState ["LastKnownPath"];
1197 receipt.Uri = UriFu.PathToFileUri (last_known_path);
1198 Logger.Log.Debug ("Last known path is {0}", last_known_path);
1200 // This rename is now in the index, so we no longer need to keep
1201 // track of the uid in memory.
1202 ForgetId (last_known_path);
1204 return;
1207 string path;
1208 path = (string) indexable.LocalState ["Path"];
1209 ForgetId (path);
1211 DirectoryModel parent;
1212 parent = indexable.LocalState ["Parent"] as DirectoryModel;
1214 // The parent directory might have run away since we were indexed
1215 if (parent != null && ! parent.IsAttached)
1216 return;
1218 Guid unique_id;
1219 unique_id = GuidFu.FromUri (receipt.Uri);
1221 FileAttributes attr;
1222 attr = FileAttributesStore.ReadOrCreate (path, unique_id);
1223 attr.Path = path;
1224 attr.LastWriteTime = indexable.Timestamp;
1226 attr.FilterName = receipt.FilterName;
1227 attr.FilterVersion = receipt.FilterVersion;
1229 if (indexable.LocalState ["IsWalkable"] != null) {
1230 string name;
1231 name = (string) indexable.LocalState ["Name"];
1233 RegisterDirectory (name, parent, attr);
1236 FileAttributesStore.Write (attr);
1238 // Remap the Uri so that change notification will work properly
1239 receipt.Uri = UriFu.PathToFileUri (path);
1242 override protected void PostRemoveHook (Indexable indexable, IndexerRemovedReceipt receipt)
1244 // Find the cached external Uri and remap the Uri in the receipt.
1245 // We have to do this to make change notification work.
1246 Uri external_uri;
1247 external_uri = indexable.LocalState ["RemovedUri"] as Uri;
1248 if (external_uri == null)
1249 throw new Exception ("No cached external Uri for " + receipt.Uri);
1250 receipt.Uri = external_uri;
1251 ForgetId (external_uri.LocalPath);
1254 private bool RemapUri (Hit hit)
1256 // Store the hit's internal uri in a property
1257 Property prop;
1258 prop = Property.NewUnsearched ("beagle:InternalUri",
1259 UriFu.UriToSerializableString (hit.Uri));
1260 hit.AddProperty (prop);
1262 // Now assemble the path by looking at the parent and name
1263 string name, path;
1264 name = hit [ExactFilenamePropKey];
1265 if (name == null) {
1266 // If we don't have the filename property, we have to do a lookup
1267 // based on the guid. This happens with synthetic hits produced by
1268 // index listeners.
1269 Guid hit_id;
1270 hit_id = GuidFu.FromUri (hit.Uri);
1271 path = UniqueIdToFullPath (hit_id);
1272 } else {
1273 string parent_id_uri;
1274 parent_id_uri = hit [ParentDirUriPropKey];
1275 if (parent_id_uri == null)
1276 return false;
1278 Guid parent_id;
1279 parent_id = GuidFu.FromUriString (parent_id_uri);
1281 path = ToFullPath (name, parent_id);
1282 if (path == null)
1283 Logger.Log.Debug ("Couldn't find path of file with name '{0}' and parent '{1}'",
1284 name, GuidFu.ToShortString (parent_id));
1287 if (path != null) {
1288 hit.Uri = UriFu.PathToFileUri (path);
1289 return true;
1292 return false;
1295 // Hit filter: this handles our mapping from internal->external uris,
1296 // and checks to see if the file is still there.
1297 override protected bool HitFilter (Hit hit)
1299 Uri old_uri = hit.Uri;
1301 if (! RemapUri (hit))
1302 return false;
1304 string path;
1305 path = hit.Uri.LocalPath;
1307 bool is_directory;
1308 bool exists = false;
1310 is_directory = hit.MimeType == "inode/directory";
1312 if (hit.MimeType == null && hit.Uri.IsFile && Directory.Exists (path)) {
1313 is_directory = true;
1314 exists = true;
1317 if (! exists) {
1318 if (is_directory)
1319 exists = Directory.Exists (path);
1320 else
1321 exists = File.Exists (path);
1324 // If the file doesn't exist, we do not schedule a removal and
1325 // return false. This is to avoid "losing" files if they are
1326 // in a directory that has been renamed but which we haven't
1327 // scanned yet... if we dropped them from the index, they would
1328 // never get re-indexed (or at least not until the next time they
1329 // were touched) since they would still be stamped with EAs
1330 // indicating they were up-to-date. And that would be bad.
1331 // FIXME: It would be safe if we were in a known state, right?
1332 // i.e. every DirectoryModel is clean.
1333 if (! exists)
1334 return false;
1336 // Fetch the parent directory model from our cache to do clever
1337 // filtering to determine if we're ignoring it or not.
1338 DirectoryModel parent;
1339 parent = GetDirectoryModelByPath (Path.GetDirectoryName (path));
1341 // Check the ignore status of the hit
1342 if (filter.Ignore (parent, Path.GetFileName (path), is_directory))
1343 return false;
1345 return true;
1348 override public string GetSnippet (string [] query_terms, Hit hit)
1350 // Uri remapping from a hit is easy: the internal uri
1351 // is stored in a property.
1352 Uri uri;
1353 uri = UriFu.UriStringToUri (hit ["beagle:InternalUri"]);
1355 string path;
1356 path = TextCache.UserCache.LookupPathRaw (uri);
1358 if (path == null)
1359 return null;
1361 // If this is self-cached, use the remapped Uri
1362 if (path == TextCache.SELF_CACHE_TAG)
1363 path = hit.Uri.LocalPath;
1365 return SnippetFu.GetSnippetFromFile (query_terms, path);
1368 override public void Start ()
1370 base.Start ();
1372 event_backend.Start (this);
1374 LoadConfiguration ();
1376 Logger.Log.Debug ("Done starting FileSystemQueryable");
1379 //////////////////////////////////////////////////////////////////////////
1381 // These are the methods that the IFileEventBackend implementations should
1382 // call in response to events.
1384 public void ReportEventInDirectory (string directory_name)
1386 DirectoryModel dir;
1387 dir = GetDirectoryModelByPath (directory_name);
1389 // If something goes wrong, just fail silently.
1390 if (dir == null)
1391 return;
1393 // We only use this information to prioritize the order in which
1394 // we crawl directories --- so if this directory doesn't
1395 // actually need to be crawled, we can safely ignore it.
1396 if (! dir.NeedsCrawl)
1397 return;
1399 dir.LastActivityTime = DateTime.Now;
1401 Logger.Log.Debug ("Saw event in '{0}'", directory_name);
1404 public void HandleAddEvent (string directory_name, string file_name, bool is_directory)
1406 Logger.Log.Debug ("*** Add '{0}' '{1}' {2}", directory_name, file_name,
1407 is_directory ? "(dir)" : "(file)");
1409 DirectoryModel dir;
1410 dir = GetDirectoryModelByPath (directory_name);
1411 if (dir == null) {
1412 Logger.Log.Warn ("HandleAddEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1413 return;
1416 if (is_directory)
1417 AddDirectory (dir, file_name);
1418 else
1419 AddFile (dir, file_name);
1422 public void HandleRemoveEvent (string directory_name, string file_name, bool is_directory)
1424 Logger.Log.Debug ("*** Remove '{0}' '{1}' {2}", directory_name, file_name,
1425 is_directory ? "(dir)" : "(file)");
1427 if (is_directory) {
1428 string path;
1429 path = Path.Combine (directory_name, file_name);
1431 DirectoryModel dir;
1432 dir = GetDirectoryModelByPath (path);
1433 if (dir == null) {
1434 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", path);
1435 return;
1438 dir.WatchHandle = null;
1439 RemoveDirectory (dir);
1440 } else {
1441 DirectoryModel dir;
1442 dir = GetDirectoryModelByPath (directory_name);
1443 if (dir == null) {
1444 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1445 return;
1448 RemoveFile (dir, file_name);
1452 public void HandleMoveEvent (string old_directory_name, string old_file_name,
1453 string new_directory_name, string new_file_name,
1454 bool is_directory)
1456 Logger.Log.Debug ("*** Move '{0}' '{1}' -> '{2}' '{3}' {4}",
1457 old_directory_name, old_file_name,
1458 new_directory_name, new_file_name,
1459 is_directory ? "(dir)" : "(file)");
1461 if (is_directory) {
1462 DirectoryModel dir, new_parent;
1463 dir = GetDirectoryModelByPath (Path.Combine (old_directory_name, old_file_name));
1464 new_parent = GetDirectoryModelByPath (new_directory_name);
1465 MoveDirectory (dir, new_parent, new_file_name);
1466 return;
1467 } else {
1468 DirectoryModel old_dir, new_dir;
1469 old_dir = GetDirectoryModelByPath (old_directory_name);
1470 new_dir = GetDirectoryModelByPath (new_directory_name);
1471 MoveFile (old_dir, old_file_name, new_dir, new_file_name);
1475 public void HandleOverflowEvent ()
1477 Logger.Log.Debug ("Queue overflows suck");