* KNotesQueryable.cs: Dont re-index all the notes when the notes file changes. Since...
[beagle.git] / beagled / FileSystemQueryable / FileSystemQueryable.cs
blobb22cec668aac7367529b451c4cadeafca46f7c72
1 //
2 // FileSystemQueryable.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
30 using System.Reflection;
31 using System.Text;
32 using System.Threading;
34 using Beagle.Daemon;
35 using Beagle.Util;
37 namespace Beagle.Daemon.FileSystemQueryable {
39 [QueryableFlavor (Name="Files", Domain=QueryDomain.Local, RequireInotify=false)]
40 [PropertyKeywordMapping (Keyword="extension", PropertyName="beagle:FilenameExtension", IsKeyword=true, Description="File extension, e.g. extension:jpeg. Use extension: to search in files with no extension.")]
41 [PropertyKeywordMapping (Keyword="ext", PropertyName="beagle:FilenameExtension", IsKeyword=true, Description="File extension, e.g. ext:jpeg. Use ext: to search in files with no extension.")]
42 public class FileSystemQueryable : LuceneQueryable {
44 static public bool Debug = false;
46 private const string SplitFilenamePropKey = "beagle:SplitFilename";
47 public const string ExactFilenamePropKey = "beagle:ExactFilename";
48 public const string TextFilenamePropKey = "beagle:Filename";
49 public const string NoPunctFilenamePropKey = "beagle:NoPunctFilename";
50 public const string FilenameExtensionPropKey = "beagle:FilenameExtension";
51 public const string ParentDirUriPropKey = LuceneQueryingDriver.PrivateNamespace + "ParentDirUri";
52 public const string IsDirectoryPropKey = LuceneQueryingDriver.PrivateNamespace + "IsDirectory";
54 // History:
55 // 1: Initially set to force a reindex due to NameIndex changes.
56 // 2: Overhauled everything to use new lucene infrastructure.
57 // 3: Switched to UTC for all times, changed the properties a bit.
58 // 4: Changed the key of TextFilenamePropKey to beagle:Filename - it might be useful in clients.
59 // Make SplitFilenamePropKey unstored
60 // 5: Keyword properies in the private namespace are no longer lower cased; this is required to
61 // offset the change in LuceneCommon.cs
62 const int MINOR_VERSION = 5;
64 private object big_lock = new object ();
66 private IFileEventBackend event_backend;
68 // This is the task that walks the tree structure
69 private TreeCrawlTask tree_crawl_task;
71 // This is the task that finds the next place that
72 // needs to be crawled in the tree and spawns off
73 // the appropriate IndexableGenerator.
74 private FileCrawlTask file_crawl_task;
76 private ArrayList roots = new ArrayList ();
77 private ArrayList roots_by_path = new ArrayList ();
79 private FileNameFilter filter;
81 // This is just a copy of the LuceneQueryable's QueryingDriver
82 // cast into the right type for doing internal->external Uri
83 // lookups.
84 private LuceneNameResolver name_resolver;
86 //////////////////////////////////////////////////////////////////////////
88 private Hashtable cached_uid_by_path = new Hashtable ();
90 //////////////////////////////////////////////////////////////////////////
92 public FileSystemQueryable () : base ("FileSystemIndex", MINOR_VERSION)
94 // Set up our event backend
95 if (Inotify.Enabled) {
96 Logger.Log.Debug ("Starting Inotify Backend");
97 event_backend = new InotifyBackend ();
98 } else {
99 Logger.Log.Debug ("Creating null file event backend");
100 event_backend = new NullFileEventBackend ();
103 tree_crawl_task = new TreeCrawlTask (new TreeCrawlTask.Handler (AddDirectory));
104 tree_crawl_task.Source = this;
106 file_crawl_task = new FileCrawlTask (this);
107 file_crawl_task.Source = this;
109 name_resolver = (LuceneNameResolver) Driver;
110 PreloadDirectoryNameInfo ();
112 // Setup our file-name filter
113 filter = new FileNameFilter (this);
115 // Do the right thing when paths expire
116 DirectoryModel.ExpireEvent +=
117 new DirectoryModel.ExpireHandler (ExpireDirectoryPath);
121 override protected IFileAttributesStore BuildFileAttributesStore ()
123 return new FileAttributesStore_Mixed (IndexDirectory, IndexFingerprint);
126 override protected LuceneQueryingDriver BuildLuceneQueryingDriver (string index_name,
127 int minor_version,
128 bool read_only_mode)
130 return new LuceneNameResolver (index_name, minor_version, read_only_mode);
133 public FileNameFilter Filter {
134 get { return filter; }
137 //////////////////////////////////////////////////////////////////////////
140 // This is where we build our Indexables
143 public static void AddStandardPropertiesToIndexable (Indexable indexable,
144 string name,
145 Guid parent_id,
146 bool mutable)
148 StringBuilder sb;
149 sb = new StringBuilder ();
151 string no_ext, ext, no_punct;
152 no_ext = Path.GetFileNameWithoutExtension (name);
153 ext = Path.GetExtension (name).ToLower ();
155 sb.Append (no_ext);
156 for (int i = 0; i < sb.Length; ++i)
157 if (! Char.IsLetterOrDigit (sb [i]))
158 sb [i] = ' ';
159 no_punct = sb.ToString ();
162 Property prop;
164 prop = Property.NewKeyword (ExactFilenamePropKey, name);
165 prop.IsMutable = mutable;
166 indexable.AddProperty (prop);
168 prop = Property.New (TextFilenamePropKey, no_ext);
169 prop.IsMutable = mutable;
170 indexable.AddProperty (prop);
172 prop = Property.New (NoPunctFilenamePropKey, no_punct);
173 prop.IsMutable = mutable;
174 indexable.AddProperty (prop);
176 prop = Property.NewUnsearched (FilenameExtensionPropKey, ext);
177 prop.IsMutable = mutable;
178 indexable.AddProperty (prop);
180 string str;
181 str = StringFu.FuzzyDivide (no_ext);
182 prop = Property.NewUnstored (SplitFilenamePropKey, str);
183 prop.IsMutable = mutable;
184 indexable.AddProperty (prop);
186 if (parent_id == Guid.Empty)
187 return;
189 str = GuidFu.ToUriString (parent_id);
190 // We use the uri here to recycle terms in the index,
191 // since each directory's uri will already be indexed.
192 prop = Property.NewUnsearched (ParentDirUriPropKey, str);
193 prop.IsMutable = mutable;
194 indexable.AddProperty (prop);
197 public static void AddStandardPropertiesToIndexable (Indexable indexable,
198 string name,
199 DirectoryModel parent,
200 bool mutable)
202 AddStandardPropertiesToIndexable (indexable,
203 name,
204 parent == null ? Guid.Empty : parent.UniqueId,
205 mutable);
207 indexable.LocalState ["Parent"] = parent;
210 public static Indexable DirectoryToIndexable (string path,
211 Guid id,
212 DirectoryModel parent)
214 Indexable indexable;
215 try {
216 indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id));
217 indexable.MimeType = "inode/directory";
218 indexable.NoContent = true;
219 indexable.Timestamp = Directory.GetLastWriteTimeUtc (path);
220 } catch (IOException) {
221 // Looks like the directory was deleted.
222 return null;
225 string name;
226 if (parent == null)
227 name = path;
228 else
229 name = Path.GetFileName (path);
230 AddStandardPropertiesToIndexable (indexable, name, parent, true);
232 Property prop;
233 prop = Property.NewBool (IsDirectoryPropKey, true);
234 prop.IsMutable = true; // we want this in the secondary index, for efficiency
235 indexable.AddProperty (prop);
237 indexable.LocalState ["Path"] = path;
239 return indexable;
242 public static Indexable FileToIndexable (string path,
243 Guid id,
244 DirectoryModel parent,
245 bool crawl_mode)
247 Indexable indexable;
249 try {
250 indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id));
251 indexable.Timestamp = File.GetLastWriteTimeUtc (path);
252 indexable.ContentUri = UriFu.PathToFileUri (path);
253 indexable.Crawled = crawl_mode;
254 indexable.Filtering = Beagle.IndexableFiltering.Always;
255 } catch (IOException) {
256 // Looks like the file was deleted.
257 return null;
260 AddStandardPropertiesToIndexable (indexable, Path.GetFileName (path), parent, true);
262 indexable.LocalState ["Path"] = path;
264 return indexable;
267 private static Indexable NewRenamingIndexable (string name,
268 Guid id,
269 DirectoryModel parent,
270 string last_known_path)
272 Indexable indexable;
273 indexable = new Indexable (IndexableType.PropertyChange, GuidFu.ToUri (id));
275 AddStandardPropertiesToIndexable (indexable, name, parent, true);
277 indexable.LocalState ["Id"] = id;
278 indexable.LocalState ["LastKnownPath"] = last_known_path;
280 return indexable;
283 //////////////////////////////////////////////////////////////////////////
286 // Mapping from directory ids to paths
289 private Hashtable dir_models_by_id = new Hashtable ();
290 private Hashtable name_info_by_id = new Hashtable ();
292 // We fall back to using the name information in the index
293 // until we've fully constructed our set of DirectoryModels.
294 private void PreloadDirectoryNameInfo ()
296 ICollection all;
297 all = name_resolver.GetAllDirectoryNameInfo ();
298 foreach (LuceneNameResolver.NameInfo info in all)
299 name_info_by_id [info.Id] = info;
302 // This only works for directories.
303 private string UniqueIdToDirectoryName (Guid id)
305 DirectoryModel dir;
306 dir = dir_models_by_id [id] as DirectoryModel;
307 if (dir != null)
308 return dir.FullName;
310 LuceneNameResolver.NameInfo info;
311 info = name_info_by_id [id] as LuceneNameResolver.NameInfo;
312 if (info != null) {
313 if (info.ParentId == Guid.Empty) // i.e. this is a root
314 return info.Name;
315 else {
316 string parent_name;
317 parent_name = UniqueIdToDirectoryName (info.ParentId);
318 if (parent_name == null)
319 return null;
320 return Path.Combine (parent_name, info.Name);
324 return null;
327 private void CacheDirectoryNameChange (Guid id, Guid new_parent_id, string new_name)
329 LuceneNameResolver.NameInfo info;
330 info = name_info_by_id [id] as LuceneNameResolver.NameInfo;
331 if (info != null) {
332 info.ParentId = new_parent_id;
333 info.Name = new_name;
337 private string ToFullPath (string name, Guid parent_id)
339 // This is the correct behavior for roots.
340 if (parent_id == Guid.Empty)
341 return name;
343 string parent_name;
344 parent_name = UniqueIdToDirectoryName (parent_id);
345 if (parent_name == null)
346 return null;
348 return Path.Combine (parent_name, name);
351 // This works for both files and directories.
352 private string UniqueIdToFullPath (Guid id)
354 // First, check if it is a directory.
355 string path;
356 path = UniqueIdToDirectoryName (id);
357 if (path != null)
358 return path;
360 // If not, try to pull name information out of the index.
361 LuceneNameResolver.NameInfo info;
362 info = name_resolver.GetNameInfoById (id);
363 if (info == null)
364 return null;
365 return ToFullPath (info.Name, info.ParentId);
368 private void RegisterId (string name, DirectoryModel dir, Guid id)
370 cached_uid_by_path [Path.Combine (dir.FullName, name)] = id;
373 private void ForgetId (string path)
375 cached_uid_by_path.Remove (path);
378 // This works for files. (It probably works for directories
379 // too, but you should use one of the more efficient means
380 // above if you know it is a directory.)
381 private Guid NameAndParentToId (string name, DirectoryModel dir)
383 string path;
384 path = Path.Combine (dir.FullName, name);
386 Guid unique_id;
387 if (cached_uid_by_path.Contains (path))
388 unique_id = (Guid) cached_uid_by_path [path];
389 else
390 unique_id = name_resolver.GetIdByNameAndParentId (name, dir.UniqueId);
392 return unique_id;
395 //////////////////////////////////////////////////////////////////////////
398 // Directory-related methods
401 private Hashtable dir_models_by_path = new Hashtable ();
403 private DirectoryModel GetDirectoryModelByPath (string path)
405 DirectoryModel dir;
407 lock (dir_models_by_path) {
408 dir = dir_models_by_path [path] as DirectoryModel;
409 if (dir != null)
410 return dir;
413 // Walk each root until we find the correct path
414 foreach (DirectoryModel root in roots) {
415 dir = root.WalkTree (path);
416 if (dir != null) {
417 lock (dir_models_by_path)
418 dir_models_by_path [path] = dir;
419 break;
423 return dir;
426 private void ExpireDirectoryPath (string expired_path, Guid unique_id)
428 if (Debug)
429 Logger.Log.Debug ("Expired '{0}'", expired_path);
431 lock (dir_models_by_path)
432 dir_models_by_path.Remove (expired_path);
435 public void AddDirectory (DirectoryModel parent, string name)
437 // Ignore the stuff we want to ignore.
438 if (filter.Ignore (parent, name, true))
439 return;
441 // FIXME: ! parent.HasChildWithName (name)
442 if (parent != null && parent.HasChildWithName (name))
443 return;
445 string path;
446 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
448 if (Debug)
449 Logger.Log.Debug ("Adding directory '{0}'", path, name);
451 if (! Directory.Exists (path)) {
452 Logger.Log.Error ("Can't add directory: '{0}' does not exist", path);
453 return;
456 FileAttributes attr;
457 attr = FileAttributesStore.Read (path);
459 // Note that we don't look at the mtime of a directory when
460 // deciding whether or not to index it.
461 bool needs_indexing = false;
462 if (attr == null) {
463 // If it has no attributes, it definitely needs
464 // indexing.
465 needs_indexing = true;
466 } else {
467 // Make sure that it still has the same name as before.
468 // If not, we need to re-index it.
469 // We can do this since we preloaded all of the name
470 // info in the directory via PreloadDirectoryNameInfo.
471 string last_known_name;
472 last_known_name = UniqueIdToDirectoryName (attr.UniqueId);
473 if (last_known_name != path) {
474 Logger.Log.Debug ("'{0}' now seems to be called '{1}'", last_known_name, path);
475 needs_indexing = true;
479 // If we can't descend into this directory, we want to
480 // index it but not build a DirectoryModel for it.
481 // FIXME: We should do the right thing when a
482 // directory's permissions change.
483 bool is_walkable;
484 is_walkable = DirectoryWalker.IsWalkable (path);
485 if (! is_walkable)
486 Logger.Log.Debug ("Can't walk '{0}'", path);
488 if (needs_indexing)
489 ScheduleDirectory (name, parent, attr, is_walkable);
490 else if (is_walkable)
491 RegisterDirectory (name, parent, attr);
494 public void AddRoot (string path)
496 path = StringFu.SanitizePath (path);
497 Logger.Log.Debug ("Adding root: {0}", path);
499 if (roots_by_path.Contains (path)) {
500 Logger.Log.Error ("Trying to add an existing root: {0}", path);
501 return;
504 // We need to have the path key in the roots hashtable
505 // for the filtering to work as we'd like before the root
506 // is actually added.
507 roots_by_path.Add (path);
509 AddDirectory (null, path);
512 public void RemoveRoot (string path)
514 Logger.Log.Debug ("Removing root: {0}", path);
516 if (! roots_by_path.Contains (path)) {
517 Logger.Log.Error ("Trying to remove a non-existing root: {0}", path);
518 return;
521 // Find our directory model for the root
522 DirectoryModel dir;
523 dir = GetDirectoryModelByPath (path);
525 if (dir == null) {
526 Logger.Log.Error ("Could not find directory-model for root: {0}", path);
527 return;
530 // FIXME: Make sure we're emptying the crawler task of any sub-directories
531 // to the root we're removing. It's not a big deal since we do an Ignore-check
532 // in there, but it would be nice.
534 roots_by_path.Remove (path);
535 roots.Remove (dir);
537 // Clean out the root from our directory cache.
538 RemoveDirectory (dir);
541 private void ScheduleDirectory (string name,
542 DirectoryModel parent,
543 FileAttributes attr,
544 bool is_walkable)
546 string path;
547 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
549 Guid id;
550 id = (attr == null) ? Guid.NewGuid () : attr.UniqueId;
552 DateTime last_crawl;
553 last_crawl = (attr == null) ? DateTime.MinValue : attr.LastWriteTime;
555 Indexable indexable;
556 indexable = DirectoryToIndexable (path, id, parent);
558 if (indexable != null) {
559 indexable.LocalState ["Name"] = name;
560 indexable.LocalState ["LastCrawl"] = last_crawl;
561 indexable.LocalState ["IsWalkable"] = is_walkable;
563 Scheduler.Task task;
564 task = NewAddTask (indexable);
565 task.Priority = Scheduler.Priority.Delayed;
566 ThisScheduler.Add (task);
570 private bool RegisterDirectory (string name, DirectoryModel parent, FileAttributes attr)
572 string path;
573 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
575 if (Debug)
576 Logger.Log.Debug ("Registered directory '{0}' ({1})", path, attr.UniqueId);
578 DateTime mtime;
580 try {
581 mtime = Directory.GetLastWriteTimeUtc (path);
582 } catch (IOException) {
583 Log.Debug ("Directory '{0}' ({1}) appears to have gone away", path, attr.UniqueId);
584 return false;
587 DirectoryModel dir;
588 if (parent == null)
589 dir = DirectoryModel.NewRoot (big_lock, path, attr);
590 else
591 dir = parent.AddChild (name, attr);
593 if (mtime > attr.LastWriteTime) {
594 dir.State = DirectoryState.Dirty;
595 if (Debug)
596 Logger.Log.Debug ("'{0}' is dirty", path);
599 if (Debug) {
600 if (dir.IsRoot)
601 Logger.Log.Debug ("Created model '{0}'", dir.FullName);
602 else
603 Logger.Log.Debug ("Created model '{0}' with parent '{1}'", dir.FullName, dir.Parent.FullName);
606 // Add any roots we create to the list of roots
607 if (dir.IsRoot)
608 roots.Add (dir);
610 // Add the directory to our by-id hash, and remove any NameInfo
611 // we might have cached about it.
612 dir_models_by_id [dir.UniqueId] = dir;
613 name_info_by_id.Remove (dir.UniqueId);
615 // Start watching the directory.
616 dir.WatchHandle = event_backend.CreateWatch (path);
618 // Schedule this directory for crawling.
619 if (tree_crawl_task.Add (dir))
620 ThisScheduler.Add (tree_crawl_task);
622 // Make sure that our file crawling task is active,
623 // since presumably we now have something new to crawl.
624 ActivateFileCrawling ();
626 return true;
629 private void ForgetDirectoryRecursively (DirectoryModel dir)
631 foreach (DirectoryModel child in dir.Children)
632 ForgetDirectoryRecursively (child);
634 if (dir.WatchHandle != null)
635 event_backend.ForgetWatch (dir.WatchHandle);
636 dir_models_by_id.Remove (dir.UniqueId);
637 // We rely on the expire event to remove it from dir_models_by_path
640 private void RemoveDirectory (DirectoryModel dir)
642 Uri uri;
643 uri = GuidFu.ToUri (dir.UniqueId);
645 Indexable indexable;
646 indexable = new Indexable (IndexableType.Remove, uri);
648 // Remember a copy of our external Uri, so that we can
649 // easily remap it in the PostRemoveHook.
650 indexable.LocalState ["RemovedUri"] = UriFu.PathToFileUri (dir.FullName);
652 // Forget watches and internal references
653 ForgetDirectoryRecursively (dir);
655 // Calling Remove will expire the path names,
656 // so name caches will be cleaned up accordingly.
657 dir.Remove ();
659 Scheduler.Task task;
660 task = NewAddTask (indexable); // We *add* the indexable to *remove* the index item
661 task.Priority = Scheduler.Priority.Immediate;
662 ThisScheduler.Add (task);
665 public void RemoveDirectory (string path)
667 DirectoryModel dir = GetDirectoryModelByPath (path);
668 if (dir != null)
669 RemoveDirectory (dir);
672 private void MoveDirectory (DirectoryModel dir,
673 DirectoryModel new_parent, // or null if we are just renaming
674 string new_name)
676 if (dir == null) {
677 Logger.Log.Warn ("Couldn't find DirectoryModel for directory moving to '{0}' in '{1}', so it was hopefully never there.",
678 new_name, new_parent.FullName);
679 AddDirectory (new_parent, new_name);
680 return;
683 if (dir.IsRoot)
684 throw new Exception ("Can't move root " + dir.FullName);
686 // We'll need this later in order to generate the
687 // right change notification.
688 string old_path;
689 old_path = dir.FullName;
691 if (new_parent != null && new_parent != dir.Parent)
692 dir.MoveTo (new_parent, new_name);
693 else
694 dir.Name = new_name;
696 // Remember this by path
697 lock (dir_models_by_path)
698 dir_models_by_path [dir.FullName] = dir;
700 CacheDirectoryNameChange (dir.UniqueId, dir.Parent.UniqueId, new_name);
702 Indexable indexable;
703 indexable = NewRenamingIndexable (new_name,
704 dir.UniqueId,
705 dir.Parent, // == new_parent
706 old_path);
707 indexable.LocalState ["OurDirectoryModel"] = dir;
709 Scheduler.Task task;
710 task = NewAddTask (indexable);
711 task.Priority = Scheduler.Priority.Immediate;
712 // Danger Will Robinson!
713 // We need to use BlockUntilNoCollision to get the correct notifications
714 // in a mv a b; mv b c; mv c a situation.
715 // FIXME: And now that type no longer exists!
716 ThisScheduler.Add (task);
719 //////////////////////////////////////////////////////////////////////////
722 // This code controls the directory crawl order
725 private DirectoryModel StupidWalk (DirectoryModel prev_best, DirectoryModel contender)
727 if (contender.NeedsCrawl) {
728 if (prev_best == null || prev_best.CompareTo (contender) < 0)
729 prev_best = contender;
732 foreach (DirectoryModel child in contender.Children)
733 prev_best = StupidWalk (prev_best, child);
735 return prev_best;
738 public DirectoryModel GetNextDirectoryToCrawl ()
740 DirectoryModel next_dir = null;
742 foreach (DirectoryModel root in roots)
743 next_dir = StupidWalk (next_dir, root);
745 return next_dir;
748 public void DoneCrawlingOneDirectory (DirectoryModel dir)
750 if (! dir.IsAttached)
751 return;
753 FileAttributes attr;
754 attr = FileAttributesStore.Read (dir.FullName);
756 // Don't mark ourselves; let the crawler redo us
757 if (attr == null)
758 return;
760 // We don't have to be super-careful about this since
761 // we only use the FileAttributes mtime on a directory
762 // to determine its initial state, not whether or not
763 // its index record is up-to-date.
764 attr.LastWriteTime = DateTime.UtcNow;
766 // ...but we do use this to decide which order directories get
767 // crawled in.
768 dir.LastCrawlTime = DateTime.UtcNow;
770 FileAttributesStore.Write (attr);
771 dir.MarkAsClean ();
774 public void MarkDirectoryAsUncrawlable (DirectoryModel dir)
776 if (! dir.IsAttached)
777 return;
779 // If we managed to get set up a watch on this directory,
780 // drop it.
781 if (dir.WatchHandle != null) {
782 event_backend.ForgetWatch (dir.WatchHandle);
783 dir.WatchHandle = null;
786 dir.MarkAsUncrawlable ();
789 public void Recrawl (string path)
791 // Try to find a directory model for the path specified
792 // so that we can re-crawl it.
793 DirectoryModel dir;
794 dir = GetDirectoryModelByPath (path);
796 bool path_is_registered = true;
798 if (dir == null) {
799 dir = GetDirectoryModelByPath (FileSystem.GetDirectoryNameRootOk (path));
800 path_is_registered = false;
802 if (dir == null) {
803 Logger.Log.Debug ("Unable to get directory-model for path: {0}", path);
804 return;
808 Logger.Log.Debug ("Re-crawling {0}", dir.FullName);
810 if (tree_crawl_task.Add (dir))
811 ThisScheduler.Add (tree_crawl_task);
813 if (path_is_registered)
814 Recrawl_Recursive (dir, DirectoryState.PossiblyClean);
816 ActivateFileCrawling ();
817 ActivateDirectoryCrawling ();
820 public void RecrawlEverything ()
822 Logger.Log.Debug ("Re-crawling all directories");
824 foreach (DirectoryModel root in roots)
825 Recrawl_Recursive (root, DirectoryState.PossiblyClean);
827 ActivateFileCrawling ();
828 ActivateDirectoryCrawling ();
831 private void Recrawl_Recursive (DirectoryModel dir, DirectoryState state)
833 dir.State = state;
834 tree_crawl_task.Add (dir);
835 foreach (DirectoryModel sub_dir in dir.Children)
836 Recrawl_Recursive (sub_dir, state);
839 private void ActivateFileCrawling ()
841 if (! file_crawl_task.IsActive)
842 ThisScheduler.Add (file_crawl_task);
845 private void ActivateDirectoryCrawling ()
847 if (! tree_crawl_task.IsActive)
848 ThisScheduler.Add (tree_crawl_task);
851 //////////////////////////////////////////////////////////////////////////
854 // File-related methods
857 private enum RequiredAction {
858 None,
859 Index,
860 Rename,
861 Forget
864 static DateTime epoch = new DateTime (1970, 1, 1, 0, 0, 0);
866 static DateTime ToDateTimeUtc (long time_t)
868 return epoch.AddSeconds (time_t);
871 private RequiredAction DetermineRequiredAction (DirectoryModel dir,
872 string name,
873 FileAttributes attr,
874 out string last_known_path)
876 last_known_path = null;
878 string path;
879 path = Path.Combine (dir.FullName, name);
881 if (Debug)
882 Logger.Log.Debug ("*** What should we do with {0}?", path);
884 if (filter.Ignore (dir, name, false)) {
885 // If there are attributes on the file, we must have indexed
886 // it previously. Since we are ignoring it now, we should strip
887 // any file attributes from it.
888 if (attr != null) {
889 if (Debug)
890 Logger.Log.Debug ("*** Forget it: File is ignored but has attributes");
891 return RequiredAction.Forget;
893 if (Debug)
894 Logger.Log.Debug ("*** Do nothing: File is ignored");
895 return RequiredAction.None;
898 if (attr == null) {
899 if (Debug)
900 Logger.Log.Debug ("*** Index it: File has no attributes");
901 return RequiredAction.Index;
904 // FIXME: This does not take in to account that we might have a better matching filter to use now
905 // That, however, is kind of expensive to figure out since we'd have to do mime-sniffing and shit.
906 if (attr.FilterName != null && attr.FilterVersion > 0) {
907 int current_filter_version;
908 current_filter_version = FilterFactory.GetFilterVersion (attr.FilterName);
910 if (current_filter_version > attr.FilterVersion) {
911 if (Debug)
912 Logger.Log.Debug ("*** Index it: Newer filter version found for filter {0}", attr.FilterName);
913 return RequiredAction.Index;
917 Mono.Unix.Native.Stat stat;
918 try {
919 Mono.Unix.Native.Syscall.stat (path, out stat);
920 } catch (Exception ex) {
921 Logger.Log.Debug (ex, "Caught exception stat-ing {0}", path);
922 return RequiredAction.None;
925 DateTime last_write_time, last_attr_time;
926 last_write_time = ToDateTimeUtc (stat.st_mtime);
927 last_attr_time = ToDateTimeUtc (stat.st_ctime);
929 if (attr.LastWriteTime != last_write_time) {
930 if (Debug)
931 Logger.Log.Debug ("*** Index it: MTime has changed ({0} vs {1})", attr.LastWriteTime, last_write_time);
933 // If the file has been copied, it will have the
934 // original file's EAs. Thus we have to check to
935 // make sure that the unique id in the EAs actually
936 // belongs to this file. If not, replace it with a new one.
937 // (Thus touching & then immediately renaming a file can
938 // cause its unique id to change, which is less than
939 // optimal but probably can't be helped.)
940 last_known_path = UniqueIdToFullPath (attr.UniqueId);
941 if (path != last_known_path) {
942 if (Debug)
943 Logger.Log.Debug ("*** Name has also changed, assigning new unique id");
944 attr.UniqueId = Guid.NewGuid ();
947 return RequiredAction.Index;
950 // If the inode ctime is newer than the last time we last
951 // set file attributes, we might have been moved. We don't
952 // strictly compare times due to the fact that although
953 // setting xattrs changes the ctime, if we don't have write
954 // access our metadata will be stored in sqlite, and the
955 // ctime will be at some point in the past.
956 if (attr.LastAttrTime < last_attr_time) {
957 if (Debug)
958 Logger.Log.Debug ("*** CTime is newer, checking last known path ({0} vs {1})", attr.LastAttrTime, last_attr_time);
960 last_known_path = UniqueIdToFullPath (attr.UniqueId);
962 if (last_known_path == null) {
963 if (Debug)
964 Logger.Log.Debug ("*** Index it: CTime has changed, but can't determine last known path");
965 return RequiredAction.Index;
968 // If the name has changed but the mtime
969 // hasn't, the only logical conclusion is that
970 // the file has been renamed.
971 if (path != last_known_path) {
972 if (Debug)
973 Logger.Log.Debug ("*** Rename it: CTime and path has changed");
974 return RequiredAction.Rename;
978 // We don't have to do anything, which is always preferable.
979 if (Debug)
980 Logger.Log.Debug ("*** Do nothing");
981 return RequiredAction.None;
984 // Return an indexable that will do the right thing with a file
985 // (or null, if the right thing is to do nothing)
986 public Indexable GetCrawlingFileIndexable (DirectoryModel dir, string name)
988 string path;
989 path = Path.Combine (dir.FullName, name);
991 FileAttributes attr;
992 attr = FileAttributesStore.Read (path);
994 RequiredAction action;
995 string last_known_path;
996 action = DetermineRequiredAction (dir, name, attr, out last_known_path);
998 if (action == RequiredAction.None)
999 return null;
1001 Guid unique_id;
1002 if (attr != null)
1003 unique_id = attr.UniqueId;
1004 else
1005 unique_id = Guid.NewGuid ();
1007 Indexable indexable = null;
1009 switch (action) {
1011 case RequiredAction.Index:
1012 indexable = FileToIndexable (path, unique_id, dir, true);
1013 break;
1015 case RequiredAction.Rename:
1016 indexable = NewRenamingIndexable (name, unique_id, dir,
1017 last_known_path);
1018 break;
1020 case RequiredAction.Forget:
1021 FileAttributesStore.Drop (path);
1023 break;
1026 return indexable;
1029 public void AddFile (DirectoryModel dir, string name)
1031 string path;
1032 path = Path.Combine (dir.FullName, name);
1034 if (! File.Exists (path))
1035 return;
1037 if (FileSystem.IsSpecialFile (path))
1038 return;
1040 if (filter.Ignore (dir, name, false))
1041 return;
1043 // If this file already has extended attributes,
1044 // make sure that the name matches the file
1045 // that is in the index. If not, it could be
1046 // a copy of an already-indexed file and should
1047 // be assigned a new unique id.
1048 Guid unique_id = Guid.Empty;
1049 FileAttributes attr;
1050 attr = FileAttributesStore.Read (path);
1051 if (attr != null) {
1052 LuceneNameResolver.NameInfo info;
1053 info = name_resolver.GetNameInfoById (attr.UniqueId);
1054 if (info != null
1055 && info.Name == name
1056 && info.ParentId == dir.UniqueId)
1057 unique_id = attr.UniqueId;
1060 if (unique_id == Guid.Empty)
1061 unique_id = Guid.NewGuid ();
1063 RegisterId (name, dir, unique_id);
1065 Indexable indexable;
1066 indexable = FileToIndexable (path, unique_id, dir, false);
1068 if (indexable != null) {
1069 Scheduler.Task task;
1070 task = NewAddTask (indexable);
1071 task.Priority = Scheduler.Priority.Immediate;
1072 ThisScheduler.Add (task);
1076 public void RemoveFile (DirectoryModel dir, string name)
1078 // FIXME: We might as well remove it, even if it was being ignore.
1079 // Right?
1081 Guid unique_id;
1082 unique_id = NameAndParentToId (name, dir);
1083 if (unique_id == Guid.Empty) {
1084 Logger.Log.Info ("Could not resolve unique id of '{0}' in '{1}' for removal, it is probably already gone",
1085 name, dir.FullName);
1086 return;
1089 Uri uri, file_uri;
1090 uri = GuidFu.ToUri (unique_id);
1091 file_uri = UriFu.PathToFileUri (Path.Combine (dir.FullName, name));
1093 Indexable indexable;
1094 indexable = new Indexable (IndexableType.Remove, uri);
1095 indexable.LocalState ["RemovedUri"] = file_uri;
1097 Scheduler.Task task;
1098 task = NewAddTask (indexable);
1099 task.Priority = Scheduler.Priority.Immediate;
1100 ThisScheduler.Add (task);
1103 public void MoveFile (DirectoryModel old_dir, string old_name,
1104 DirectoryModel new_dir, string new_name)
1106 bool old_ignore, new_ignore;
1107 old_ignore = filter.Ignore (old_dir, old_name, false);
1108 new_ignore = filter.Ignore (new_dir, new_name, false);
1110 if (old_ignore && new_ignore)
1111 return;
1113 // If our ignore-state is changing, synthesize the appropriate
1114 // action.
1116 if (old_ignore && ! new_ignore) {
1117 AddFile (new_dir, new_name);
1118 return;
1121 if (! old_ignore && new_ignore) {
1122 RemoveFile (new_dir, new_name);
1123 return;
1126 // We need to find the file's unique id.
1127 // We can't look at the extended attributes w/o making
1128 // assumptions about whether they follow around the
1129 // file (EAs) or the path (sqlite)...
1130 Guid unique_id;
1131 unique_id = NameAndParentToId (old_name, old_dir);
1132 if (unique_id == Guid.Empty) {
1133 // If we can't find the unique ID, we have to
1134 // assume that the original file never made it
1135 // into the index --- thus we treat this as
1136 // an Add.
1137 AddFile (new_dir, new_name);
1138 return;
1141 RegisterId (new_name, new_dir, unique_id);
1143 string old_path;
1144 old_path = Path.Combine (old_dir.FullName, old_name);
1146 ForgetId (old_path);
1148 // FIXME: I think we need to be more conservative when we seen
1149 // events in a directory that has not been fully scanned, just to
1150 // avoid races. i.e. what if we are in the middle of crawling that
1151 // directory and haven't reached this file yet? Then the rename
1152 // will fail.
1153 Indexable indexable;
1154 indexable = NewRenamingIndexable (new_name,
1155 unique_id,
1156 new_dir,
1157 old_path);
1159 Scheduler.Task task;
1160 task = NewAddTask (indexable);
1161 task.Priority = Scheduler.Priority.Immediate;
1162 // Danger Will Robinson!
1163 // We need to use BlockUntilNoCollision to get the correct notifications
1164 // in a mv a b; mv b c; mv c a situation.
1165 // FIXME: And now AddType no longer exists
1166 ThisScheduler.Add (task);
1169 //////////////////////////////////////////////////////////////////////////
1171 // Configuration stuff
1173 public IList Roots {
1174 get {
1175 return roots_by_path;
1179 private void LoadConfiguration ()
1181 if (Conf.Indexing.IndexHomeDir)
1182 AddRoot (PathFinder.HomeDir);
1184 foreach (string root in Conf.Indexing.Roots)
1185 AddRoot (root);
1187 Conf.Subscribe (typeof (Conf.IndexingConfig), OnConfigurationChanged);
1190 private void OnConfigurationChanged (Conf.Section section)
1192 ArrayList roots_wanted = new ArrayList (Conf.Indexing.Roots);
1194 if (Conf.Indexing.IndexHomeDir)
1195 roots_wanted.Add (PathFinder.HomeDir);
1197 IList roots_to_add, roots_to_remove;
1198 ArrayFu.IntersectListChanges (roots_wanted, Roots, out roots_to_add, out roots_to_remove);
1200 foreach (string root in roots_to_remove)
1201 RemoveRoot (root);
1203 foreach (string root in roots_to_add)
1204 AddRoot (root);
1207 //////////////////////////////////////////////////////////////////////////
1210 // Our magic LuceneQueryable hooks
1213 override protected bool PreChildAddHook (Indexable child)
1215 // FIXME: Handling Uri remapping of children is tricky, and there
1216 // is also the issue of properly serializing file: uris that
1217 // contain fragments. For now we just punt it all by dropping
1218 // any child indexables of file system objects.
1219 return false;
1222 override protected void PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
1224 // If we just changed properties, remap to our *old* external Uri
1225 // to make notification work out property.
1226 if (indexable.Type == IndexableType.PropertyChange) {
1228 string last_known_path;
1229 last_known_path = (string) indexable.LocalState ["LastKnownPath"];
1230 receipt.Uri = UriFu.PathToFileUri (last_known_path);
1231 Logger.Log.Debug ("Last known path is {0}", last_known_path);
1233 // This rename is now in the index, so we no longer need to keep
1234 // track of the uid in memory.
1235 ForgetId (last_known_path);
1237 return;
1240 string path;
1241 path = (string) indexable.LocalState ["Path"];
1242 ForgetId (path);
1244 DirectoryModel parent;
1245 parent = indexable.LocalState ["Parent"] as DirectoryModel;
1247 // The parent directory might have run away since we were indexed
1248 if (parent != null && ! parent.IsAttached)
1249 return;
1251 Guid unique_id;
1252 unique_id = GuidFu.FromUri (receipt.Uri);
1254 FileAttributes attr;
1255 attr = FileAttributesStore.ReadOrCreate (path, unique_id);
1256 attr.Path = path;
1257 attr.LastWriteTime = indexable.Timestamp;
1259 attr.FilterName = receipt.FilterName;
1260 attr.FilterVersion = receipt.FilterVersion;
1262 if (indexable.LocalState ["IsWalkable"] != null) {
1263 string name;
1264 name = (string) indexable.LocalState ["Name"];
1266 if (! RegisterDirectory (name, parent, attr))
1267 return;
1270 FileAttributesStore.Write (attr);
1272 // Remap the Uri so that change notification will work properly
1273 receipt.Uri = UriFu.PathToFileUri (path);
1276 override protected void PostRemoveHook (Indexable indexable, IndexerRemovedReceipt receipt)
1278 // Find the cached external Uri and remap the Uri in the receipt.
1279 // We have to do this to make change notification work.
1280 Uri external_uri;
1281 external_uri = indexable.LocalState ["RemovedUri"] as Uri;
1282 if (external_uri == null)
1283 throw new Exception ("No cached external Uri for " + receipt.Uri);
1284 receipt.Uri = external_uri;
1285 ForgetId (external_uri.LocalPath);
1288 private bool RemapUri (Hit hit)
1290 // Store the hit's internal uri in a property
1291 Property prop;
1292 prop = Property.NewUnsearched ("beagle:InternalUri",
1293 UriFu.UriToEscapedString (hit.Uri));
1294 hit.AddProperty (prop);
1296 // Now assemble the path by looking at the parent and name
1297 string name, path;
1298 name = hit [ExactFilenamePropKey];
1299 if (name == null) {
1300 // If we don't have the filename property, we have to do a lookup
1301 // based on the guid. This happens with synthetic hits produced by
1302 // index listeners.
1303 Guid hit_id;
1304 hit_id = GuidFu.FromUri (hit.Uri);
1305 path = UniqueIdToFullPath (hit_id);
1306 } else {
1307 string parent_id_uri;
1308 parent_id_uri = hit [ParentDirUriPropKey];
1309 if (parent_id_uri == null)
1310 return false;
1312 Guid parent_id;
1313 parent_id = GuidFu.FromUriString (parent_id_uri);
1315 path = ToFullPath (name, parent_id);
1316 if (path == null)
1317 Logger.Log.Debug ("Couldn't find path of file with name '{0}' and parent '{1}'",
1318 name, GuidFu.ToShortString (parent_id));
1321 if (path != null) {
1322 hit.Uri = UriFu.PathToFileUri (path);
1323 return true;
1326 return false;
1329 // Hit filter: this handles our mapping from internal->external uris,
1330 // and checks to see if the file is still there.
1331 override protected bool HitFilter (Hit hit)
1333 Uri old_uri = hit.Uri;
1335 if (! RemapUri (hit))
1336 return false;
1338 string path;
1339 path = hit.Uri.LocalPath;
1341 bool is_directory;
1342 bool exists = false;
1344 is_directory = hit.MimeType == "inode/directory";
1346 if (hit.MimeType == null && hit.Uri.IsFile && Directory.Exists (path)) {
1347 is_directory = true;
1348 exists = true;
1351 if (! exists) {
1352 if (is_directory)
1353 exists = Directory.Exists (path);
1354 else
1355 exists = File.Exists (path);
1358 // If the file doesn't exist, we do not schedule a removal and
1359 // return false. This is to avoid "losing" files if they are
1360 // in a directory that has been renamed but which we haven't
1361 // scanned yet... if we dropped them from the index, they would
1362 // never get re-indexed (or at least not until the next time they
1363 // were touched) since they would still be stamped with EAs
1364 // indicating they were up-to-date. And that would be bad.
1365 // FIXME: It would be safe if we were in a known state, right?
1366 // i.e. every DirectoryModel is clean.
1367 if (! exists)
1368 return false;
1370 // Fetch the parent directory model from our cache to do clever
1371 // filtering to determine if we're ignoring it or not.
1372 DirectoryModel parent;
1373 parent = GetDirectoryModelByPath (Path.GetDirectoryName (path));
1375 // Check the ignore status of the hit
1376 if (filter.Ignore (parent, Path.GetFileName (path), is_directory))
1377 return false;
1379 return true;
1382 override public string GetSnippet (string [] query_terms, Hit hit)
1384 // Uri remapping from a hit is easy: the internal uri
1385 // is stored in a property.
1386 Uri uri = UriFu.EscapedStringToUri (hit ["beagle:InternalUri"]);
1388 string path = TextCache.UserCache.LookupPathRaw (uri);
1390 if (path == null)
1391 return null;
1393 // If this is self-cached, use the remapped Uri
1394 if (path == TextCache.SELF_CACHE_TAG)
1395 return SnippetFu.GetSnippetFromFile (query_terms, hit.Uri.LocalPath);
1397 return SnippetFu.GetSnippetFromTextCache (query_terms, path);
1400 override public void Start ()
1402 base.Start ();
1404 event_backend.Start (this);
1406 LoadConfiguration ();
1408 Logger.Log.Debug ("Done starting FileSystemQueryable");
1411 //////////////////////////////////////////////////////////////////////////
1413 // These are the methods that the IFileEventBackend implementations should
1414 // call in response to events.
1416 public void ReportEventInDirectory (string directory_name)
1418 DirectoryModel dir;
1419 dir = GetDirectoryModelByPath (directory_name);
1421 // If something goes wrong, just fail silently.
1422 if (dir == null)
1423 return;
1425 // We only use this information to prioritize the order in which
1426 // we crawl directories --- so if this directory doesn't
1427 // actually need to be crawled, we can safely ignore it.
1428 if (! dir.NeedsCrawl)
1429 return;
1431 dir.LastActivityTime = DateTime.Now;
1433 Logger.Log.Debug ("Saw event in '{0}'", directory_name);
1436 public void HandleAddEvent (string directory_name, string file_name, bool is_directory)
1438 Logger.Log.Debug ("*** Add '{0}' '{1}' {2}", directory_name, file_name,
1439 is_directory ? "(dir)" : "(file)");
1441 DirectoryModel dir;
1442 dir = GetDirectoryModelByPath (directory_name);
1443 if (dir == null) {
1444 Logger.Log.Warn ("HandleAddEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1445 return;
1448 if (is_directory)
1449 AddDirectory (dir, file_name);
1450 else
1451 AddFile (dir, file_name);
1454 public void HandleRemoveEvent (string directory_name, string file_name, bool is_directory)
1456 Logger.Log.Debug ("*** Remove '{0}' '{1}' {2}", directory_name, file_name,
1457 is_directory ? "(dir)" : "(file)");
1459 if (is_directory) {
1460 string path;
1461 path = Path.Combine (directory_name, file_name);
1463 DirectoryModel dir;
1464 dir = GetDirectoryModelByPath (path);
1465 if (dir == null) {
1466 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", path);
1467 return;
1470 dir.WatchHandle = null;
1471 RemoveDirectory (dir);
1472 } else {
1473 DirectoryModel dir;
1474 dir = GetDirectoryModelByPath (directory_name);
1475 if (dir == null) {
1476 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1477 return;
1480 RemoveFile (dir, file_name);
1484 public void HandleMoveEvent (string old_directory_name, string old_file_name,
1485 string new_directory_name, string new_file_name,
1486 bool is_directory)
1488 Logger.Log.Debug ("*** Move '{0}' '{1}' -> '{2}' '{3}' {4}",
1489 old_directory_name, old_file_name,
1490 new_directory_name, new_file_name,
1491 is_directory ? "(dir)" : "(file)");
1493 if (is_directory) {
1494 DirectoryModel dir, new_parent;
1495 dir = GetDirectoryModelByPath (Path.Combine (old_directory_name, old_file_name));
1496 new_parent = GetDirectoryModelByPath (new_directory_name);
1497 MoveDirectory (dir, new_parent, new_file_name);
1498 return;
1499 } else {
1500 DirectoryModel old_dir, new_dir;
1501 old_dir = GetDirectoryModelByPath (old_directory_name);
1502 new_dir = GetDirectoryModelByPath (new_directory_name);
1503 MoveFile (old_dir, old_file_name, new_dir, new_file_name);
1507 public void HandleOverflowEvent ()
1509 Logger.Log.Debug ("Queue overflows suck");