* beagled/QueryDriver.cs,beagled/PropertyKeywordFu.cs,
[beagle.git] / beagled / FileSystemQueryable / FileSystemQueryable.cs
blobb520c036c23be72d35d3f9a1646337c2604fda15
1 //
2 // FileSystemQueryable.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
30 using System.Reflection;
31 using System.Text;
32 using System.Threading;
34 using Beagle.Daemon;
35 using Beagle.Util;
37 namespace Beagle.Daemon.FileSystemQueryable {
39 [QueryableFlavor (Name="Files", Domain=QueryDomain.Local, RequireInotify=false)]
40 [PropertyKeywordMapping (Keyword="extension", PropertyName="beagle:FilenameExtension", IsKeyword=true, Description="File extension, e.g. extension:jpeg. Use extension: to search in files with no extension.")]
41 [PropertyKeywordMapping (Keyword="ext", PropertyName="beagle:FilenameExtension", IsKeyword=true, Description="File extension, e.g. ext:jpeg. Use ext: to search in files with no extension.")]
42 public class FileSystemQueryable : LuceneQueryable {
44 static public bool Debug = false;
46 private const string SplitFilenamePropKey = "beagle:SplitFilename";
47 public const string ExactFilenamePropKey = "beagle:ExactFilename";
48 public const string TextFilenamePropKey = "beagle:Filename";
49 public const string NoPunctFilenamePropKey = "beagle:NoPunctFilename";
50 public const string FilenameExtensionPropKey = "beagle:FilenameExtension";
51 public const string ParentDirUriPropKey = LuceneQueryingDriver.PrivateNamespace + "ParentDirUri";
52 public const string IsDirectoryPropKey = LuceneQueryingDriver.PrivateNamespace + "IsDirectory";
54 // History:
55 // 1: Initially set to force a reindex due to NameIndex changes.
56 // 2: Overhauled everything to use new lucene infrastructure.
57 // 3: Switched to UTC for all times, changed the properties a bit.
58 // 4: Changed the key of TextFilenamePropKey to beagle:Filename - it might be useful in clients.
59 // Make SplitFilenamePropKey unstored
60 const int MINOR_VERSION = 4;
62 private object big_lock = new object ();
64 private IFileEventBackend event_backend;
66 // This is the task that walks the tree structure
67 private TreeCrawlTask tree_crawl_task;
69 // This is the task that finds the next place that
70 // needs to be crawled in the tree and spawns off
71 // the appropriate IndexableGenerator.
72 private FileCrawlTask file_crawl_task;
74 private ArrayList roots = new ArrayList ();
75 private ArrayList roots_by_path = new ArrayList ();
77 private FileNameFilter filter;
79 // This is just a copy of the LuceneQueryable's QueryingDriver
80 // cast into the right type for doing internal->external Uri
81 // lookups.
82 private LuceneNameResolver name_resolver;
84 //////////////////////////////////////////////////////////////////////////
86 private Hashtable cached_uid_by_path = new Hashtable ();
88 //////////////////////////////////////////////////////////////////////////
90 public FileSystemQueryable () : base ("FileSystemIndex", MINOR_VERSION)
92 // Set up our event backend
93 if (Inotify.Enabled) {
94 Logger.Log.Debug ("Starting Inotify Backend");
95 event_backend = new InotifyBackend ();
96 } else {
97 Logger.Log.Debug ("Creating null file event backend");
98 event_backend = new NullFileEventBackend ();
101 tree_crawl_task = new TreeCrawlTask (new TreeCrawlTask.Handler (AddDirectory));
102 tree_crawl_task.Source = this;
104 file_crawl_task = new FileCrawlTask (this);
105 file_crawl_task.Source = this;
107 name_resolver = (LuceneNameResolver) Driver;
108 PreloadDirectoryNameInfo ();
110 // Setup our file-name filter
111 filter = new FileNameFilter (this);
113 // Do the right thing when paths expire
114 DirectoryModel.ExpireEvent +=
115 new DirectoryModel.ExpireHandler (ExpireDirectoryPath);
119 override protected IFileAttributesStore BuildFileAttributesStore ()
121 return new FileAttributesStore_Mixed (IndexDirectory, IndexFingerprint);
124 override protected LuceneQueryingDriver BuildLuceneQueryingDriver (string index_name,
125 int minor_version,
126 bool read_only_mode)
128 return new LuceneNameResolver (index_name, minor_version, read_only_mode);
131 public FileNameFilter Filter {
132 get { return filter; }
135 //////////////////////////////////////////////////////////////////////////
138 // This is where we build our Indexables
141 public static void AddStandardPropertiesToIndexable (Indexable indexable,
142 string name,
143 Guid parent_id,
144 bool mutable)
146 StringBuilder sb;
147 sb = new StringBuilder ();
149 string no_ext, ext, no_punct;
150 no_ext = Path.GetFileNameWithoutExtension (name);
151 ext = Path.GetExtension (name).ToLower ();
153 sb.Append (no_ext);
154 for (int i = 0; i < sb.Length; ++i)
155 if (! Char.IsLetterOrDigit (sb [i]))
156 sb [i] = ' ';
157 no_punct = sb.ToString ();
160 Property prop;
162 prop = Property.NewKeyword (ExactFilenamePropKey, name);
163 prop.IsMutable = mutable;
164 indexable.AddProperty (prop);
166 prop = Property.New (TextFilenamePropKey, no_ext);
167 prop.IsMutable = mutable;
168 indexable.AddProperty (prop);
170 prop = Property.New (NoPunctFilenamePropKey, no_punct);
171 prop.IsMutable = mutable;
172 indexable.AddProperty (prop);
174 prop = Property.NewUnsearched (FilenameExtensionPropKey, ext);
175 prop.IsMutable = mutable;
176 indexable.AddProperty (prop);
178 string str;
179 str = StringFu.FuzzyDivide (no_ext);
180 prop = Property.NewUnstored (SplitFilenamePropKey, str);
181 prop.IsMutable = mutable;
182 indexable.AddProperty (prop);
184 if (parent_id == Guid.Empty)
185 return;
187 str = GuidFu.ToUriString (parent_id);
188 // We use the uri here to recycle terms in the index,
189 // since each directory's uri will already be indexed.
190 prop = Property.NewUnsearched (ParentDirUriPropKey, str);
191 prop.IsMutable = mutable;
192 indexable.AddProperty (prop);
195 public static void AddStandardPropertiesToIndexable (Indexable indexable,
196 string name,
197 DirectoryModel parent,
198 bool mutable)
200 AddStandardPropertiesToIndexable (indexable,
201 name,
202 parent == null ? Guid.Empty : parent.UniqueId,
203 mutable);
205 indexable.LocalState ["Parent"] = parent;
208 public static Indexable DirectoryToIndexable (string path,
209 Guid id,
210 DirectoryModel parent)
212 Indexable indexable;
213 try {
214 indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id));
215 indexable.MimeType = "inode/directory";
216 indexable.NoContent = true;
217 indexable.Timestamp = Directory.GetLastWriteTimeUtc (path);
218 } catch (IOException) {
219 // Looks like the directory was deleted.
220 return null;
223 string name;
224 if (parent == null)
225 name = path;
226 else
227 name = Path.GetFileName (path);
228 AddStandardPropertiesToIndexable (indexable, name, parent, true);
230 Property prop;
231 prop = Property.NewBool (IsDirectoryPropKey, true);
232 prop.IsMutable = true; // we want this in the secondary index, for efficiency
233 indexable.AddProperty (prop);
235 indexable.LocalState ["Path"] = path;
237 return indexable;
240 public static Indexable FileToIndexable (string path,
241 Guid id,
242 DirectoryModel parent,
243 bool crawl_mode)
245 Indexable indexable;
247 try {
248 indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id));
249 indexable.Timestamp = File.GetLastWriteTimeUtc (path);
250 indexable.ContentUri = UriFu.PathToFileUri (path);
251 indexable.Crawled = crawl_mode;
252 indexable.Filtering = Beagle.IndexableFiltering.Always;
253 } catch (IOException) {
254 // Looks like the file was deleted.
255 return null;
258 AddStandardPropertiesToIndexable (indexable, Path.GetFileName (path), parent, true);
260 indexable.LocalState ["Path"] = path;
262 return indexable;
265 private static Indexable NewRenamingIndexable (string name,
266 Guid id,
267 DirectoryModel parent,
268 string last_known_path)
270 Indexable indexable;
271 indexable = new Indexable (IndexableType.PropertyChange, GuidFu.ToUri (id));
273 AddStandardPropertiesToIndexable (indexable, name, parent, true);
275 indexable.LocalState ["Id"] = id;
276 indexable.LocalState ["LastKnownPath"] = last_known_path;
278 return indexable;
281 //////////////////////////////////////////////////////////////////////////
284 // Mapping from directory ids to paths
287 private Hashtable dir_models_by_id = new Hashtable ();
288 private Hashtable name_info_by_id = new Hashtable ();
290 // We fall back to using the name information in the index
291 // until we've fully constructed our set of DirectoryModels.
292 private void PreloadDirectoryNameInfo ()
294 ICollection all;
295 all = name_resolver.GetAllDirectoryNameInfo ();
296 foreach (LuceneNameResolver.NameInfo info in all)
297 name_info_by_id [info.Id] = info;
300 // This only works for directories.
301 private string UniqueIdToDirectoryName (Guid id)
303 DirectoryModel dir;
304 dir = dir_models_by_id [id] as DirectoryModel;
305 if (dir != null)
306 return dir.FullName;
308 LuceneNameResolver.NameInfo info;
309 info = name_info_by_id [id] as LuceneNameResolver.NameInfo;
310 if (info != null) {
311 if (info.ParentId == Guid.Empty) // i.e. this is a root
312 return info.Name;
313 else {
314 string parent_name;
315 parent_name = UniqueIdToDirectoryName (info.ParentId);
316 if (parent_name == null)
317 return null;
318 return Path.Combine (parent_name, info.Name);
322 return null;
325 private void CacheDirectoryNameChange (Guid id, Guid new_parent_id, string new_name)
327 LuceneNameResolver.NameInfo info;
328 info = name_info_by_id [id] as LuceneNameResolver.NameInfo;
329 if (info != null) {
330 info.ParentId = new_parent_id;
331 info.Name = new_name;
335 private string ToFullPath (string name, Guid parent_id)
337 // This is the correct behavior for roots.
338 if (parent_id == Guid.Empty)
339 return name;
341 string parent_name;
342 parent_name = UniqueIdToDirectoryName (parent_id);
343 if (parent_name == null)
344 return null;
346 return Path.Combine (parent_name, name);
349 // This works for both files and directories.
350 private string UniqueIdToFullPath (Guid id)
352 // First, check if it is a directory.
353 string path;
354 path = UniqueIdToDirectoryName (id);
355 if (path != null)
356 return path;
358 // If not, try to pull name information out of the index.
359 LuceneNameResolver.NameInfo info;
360 info = name_resolver.GetNameInfoById (id);
361 if (info == null)
362 return null;
363 return ToFullPath (info.Name, info.ParentId);
366 private void RegisterId (string name, DirectoryModel dir, Guid id)
368 cached_uid_by_path [Path.Combine (dir.FullName, name)] = id;
371 private void ForgetId (string path)
373 cached_uid_by_path.Remove (path);
376 // This works for files. (It probably works for directories
377 // too, but you should use one of the more efficient means
378 // above if you know it is a directory.)
379 private Guid NameAndParentToId (string name, DirectoryModel dir)
381 string path;
382 path = Path.Combine (dir.FullName, name);
384 Guid unique_id;
385 if (cached_uid_by_path.Contains (path))
386 unique_id = (Guid) cached_uid_by_path [path];
387 else
388 unique_id = name_resolver.GetIdByNameAndParentId (name, dir.UniqueId);
390 return unique_id;
393 //////////////////////////////////////////////////////////////////////////
396 // Directory-related methods
399 private Hashtable dir_models_by_path = new Hashtable ();
401 private DirectoryModel GetDirectoryModelByPath (string path)
403 DirectoryModel dir;
405 lock (dir_models_by_path) {
406 dir = dir_models_by_path [path] as DirectoryModel;
407 if (dir != null)
408 return dir;
411 // Walk each root until we find the correct path
412 foreach (DirectoryModel root in roots) {
413 dir = root.WalkTree (path);
414 if (dir != null) {
415 lock (dir_models_by_path)
416 dir_models_by_path [path] = dir;
417 break;
421 return dir;
424 private void ExpireDirectoryPath (string expired_path, Guid unique_id)
426 if (Debug)
427 Logger.Log.Debug ("Expired '{0}'", expired_path);
429 lock (dir_models_by_path)
430 dir_models_by_path.Remove (expired_path);
433 public void AddDirectory (DirectoryModel parent, string name)
435 // Ignore the stuff we want to ignore.
436 if (filter.Ignore (parent, name, true))
437 return;
439 if (parent != null && parent.HasChildWithName (name))
440 return;
442 string path;
443 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
445 if (Debug)
446 Logger.Log.Debug ("Adding directory '{0}'", path, name);
448 if (! Directory.Exists (path)) {
449 Logger.Log.Error ("Can't add directory: '{0}' does not exist", path);
450 return;
453 FileAttributes attr;
454 attr = FileAttributesStore.Read (path);
456 // Note that we don't look at the mtime of a directory when
457 // deciding whether or not to index it.
458 bool needs_indexing = false;
459 if (attr == null) {
460 // If it has no attributes, it definitely needs
461 // indexing.
462 needs_indexing = true;
463 } else {
464 // Make sure that it still has the same name as before.
465 // If not, we need to re-index it.
466 // We can do this since we preloaded all of the name
467 // info in the directory via PreloadDirectoryNameInfo.
468 string last_known_name;
469 last_known_name = UniqueIdToDirectoryName (attr.UniqueId);
470 if (last_known_name != path) {
471 Logger.Log.Debug ("'{0}' now seems to be called '{1}'", last_known_name, path);
472 needs_indexing = true;
476 // If we can't descend into this directory, we want to
477 // index it but not build a DirectoryModel for it.
478 // FIXME: We should do the right thing when a
479 // directory's permissions change.
480 bool is_walkable;
481 is_walkable = DirectoryWalker.IsWalkable (path);
482 if (! is_walkable)
483 Logger.Log.Debug ("Can't walk '{0}'", path);
485 if (needs_indexing)
486 ScheduleDirectory (name, parent, attr, is_walkable);
487 else if (is_walkable)
488 RegisterDirectory (name, parent, attr);
491 public void AddRoot (string path)
493 path = StringFu.SanitizePath (path);
494 Logger.Log.Debug ("Adding root: {0}", path);
496 if (roots_by_path.Contains (path)) {
497 Logger.Log.Error ("Trying to add an existing root: {0}", path);
498 return;
501 // We need to have the path key in the roots hashtable
502 // for the filtering to work as we'd like before the root
503 // is actually added.
504 roots_by_path.Add (path);
506 AddDirectory (null, path);
509 public void RemoveRoot (string path)
511 Logger.Log.Debug ("Removing root: {0}", path);
513 if (! roots_by_path.Contains (path)) {
514 Logger.Log.Error ("Trying to remove a non-existing root: {0}", path);
515 return;
518 // Find our directory model for the root
519 DirectoryModel dir;
520 dir = GetDirectoryModelByPath (path);
522 if (dir == null) {
523 Logger.Log.Error ("Could not find directory-model for root: {0}", path);
524 return;
527 // FIXME: Make sure we're emptying the crawler task of any sub-directories
528 // to the root we're removing. It's not a big deal since we do an Ignore-check
529 // in there, but it would be nice.
531 roots_by_path.Remove (path);
532 roots.Remove (dir);
534 // Clean out the root from our directory cache.
535 RemoveDirectory (dir);
538 private void ScheduleDirectory (string name,
539 DirectoryModel parent,
540 FileAttributes attr,
541 bool is_walkable)
543 string path;
544 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
546 Guid id;
547 id = (attr == null) ? Guid.NewGuid () : attr.UniqueId;
549 DateTime last_crawl;
550 last_crawl = (attr == null) ? DateTime.MinValue : attr.LastWriteTime;
552 Indexable indexable;
553 indexable = DirectoryToIndexable (path, id, parent);
555 if (indexable != null) {
556 indexable.LocalState ["Name"] = name;
557 indexable.LocalState ["LastCrawl"] = last_crawl;
558 indexable.LocalState ["IsWalkable"] = is_walkable;
560 Scheduler.Task task;
561 task = NewAddTask (indexable);
562 task.Priority = Scheduler.Priority.Delayed;
563 ThisScheduler.Add (task);
567 private bool RegisterDirectory (string name, DirectoryModel parent, FileAttributes attr)
569 string path;
570 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
572 if (Debug)
573 Logger.Log.Debug ("Registered directory '{0}' ({1})", path, attr.UniqueId);
575 DateTime mtime;
577 try {
578 mtime = Directory.GetLastWriteTimeUtc (path);
579 } catch (IOException) {
580 Log.Debug ("Directory '{0}' ({1}) appears to have gone away", path, attr.UniqueId);
581 return false;
584 DirectoryModel dir;
585 if (parent == null)
586 dir = DirectoryModel.NewRoot (big_lock, path, attr);
587 else
588 dir = parent.AddChild (name, attr);
590 if (mtime > attr.LastWriteTime) {
591 dir.State = DirectoryState.Dirty;
592 if (Debug)
593 Logger.Log.Debug ("'{0}' is dirty", path);
596 if (Debug) {
597 if (dir.IsRoot)
598 Logger.Log.Debug ("Created model '{0}'", dir.FullName);
599 else
600 Logger.Log.Debug ("Created model '{0}' with parent '{1}'", dir.FullName, dir.Parent.FullName);
603 // Add any roots we create to the list of roots
604 if (dir.IsRoot)
605 roots.Add (dir);
607 // Add the directory to our by-id hash, and remove any NameInfo
608 // we might have cached about it.
609 dir_models_by_id [dir.UniqueId] = dir;
610 name_info_by_id.Remove (dir.UniqueId);
612 // Start watching the directory.
613 dir.WatchHandle = event_backend.CreateWatch (path);
615 // Schedule this directory for crawling.
616 if (tree_crawl_task.Add (dir))
617 ThisScheduler.Add (tree_crawl_task);
619 // Make sure that our file crawling task is active,
620 // since presumably we now have something new to crawl.
621 ActivateFileCrawling ();
623 return true;
626 private void ForgetDirectoryRecursively (DirectoryModel dir)
628 foreach (DirectoryModel child in dir.Children)
629 ForgetDirectoryRecursively (child);
631 if (dir.WatchHandle != null)
632 event_backend.ForgetWatch (dir.WatchHandle);
633 dir_models_by_id.Remove (dir.UniqueId);
634 // We rely on the expire event to remove it from dir_models_by_path
637 private void RemoveDirectory (DirectoryModel dir)
639 Uri uri;
640 uri = GuidFu.ToUri (dir.UniqueId);
642 Indexable indexable;
643 indexable = new Indexable (IndexableType.Remove, uri);
645 // Remember a copy of our external Uri, so that we can
646 // easily remap it in the PostRemoveHook.
647 indexable.LocalState ["RemovedUri"] = UriFu.PathToFileUri (dir.FullName);
649 // Forget watches and internal references
650 ForgetDirectoryRecursively (dir);
652 // Calling Remove will expire the path names,
653 // so name caches will be cleaned up accordingly.
654 dir.Remove ();
656 Scheduler.Task task;
657 task = NewAddTask (indexable); // We *add* the indexable to *remove* the index item
658 task.Priority = Scheduler.Priority.Immediate;
659 ThisScheduler.Add (task);
662 public void RemoveDirectory (string path)
664 DirectoryModel dir = GetDirectoryModelByPath (path);
665 if (dir != null)
666 RemoveDirectory (dir);
669 private void MoveDirectory (DirectoryModel dir,
670 DirectoryModel new_parent, // or null if we are just renaming
671 string new_name)
673 if (dir == null) {
674 Logger.Log.Warn ("Couldn't find DirectoryModel for directory moving to '{0}' in '{1}', so it was hopefully never there.",
675 new_name, new_parent.FullName);
676 AddDirectory (new_parent, new_name);
677 return;
680 if (dir.IsRoot)
681 throw new Exception ("Can't move root " + dir.FullName);
683 // We'll need this later in order to generate the
684 // right change notification.
685 string old_path;
686 old_path = dir.FullName;
688 if (new_parent != null && new_parent != dir.Parent)
689 dir.MoveTo (new_parent, new_name);
690 else
691 dir.Name = new_name;
693 // Remember this by path
694 lock (dir_models_by_path)
695 dir_models_by_path [dir.FullName] = dir;
697 CacheDirectoryNameChange (dir.UniqueId, dir.Parent.UniqueId, new_name);
699 Indexable indexable;
700 indexable = NewRenamingIndexable (new_name,
701 dir.UniqueId,
702 dir.Parent, // == new_parent
703 old_path);
704 indexable.LocalState ["OurDirectoryModel"] = dir;
706 Scheduler.Task task;
707 task = NewAddTask (indexable);
708 task.Priority = Scheduler.Priority.Immediate;
709 // Danger Will Robinson!
710 // We need to use BlockUntilNoCollision to get the correct notifications
711 // in a mv a b; mv b c; mv c a situation.
712 // FIXME: And now that type no longer exists!
713 ThisScheduler.Add (task);
716 //////////////////////////////////////////////////////////////////////////
719 // This code controls the directory crawl order
722 private DirectoryModel StupidWalk (DirectoryModel prev_best, DirectoryModel contender)
724 if (contender.NeedsCrawl) {
725 if (prev_best == null || prev_best.CompareTo (contender) < 0)
726 prev_best = contender;
729 foreach (DirectoryModel child in contender.Children)
730 prev_best = StupidWalk (prev_best, child);
732 return prev_best;
735 public DirectoryModel GetNextDirectoryToCrawl ()
737 DirectoryModel next_dir = null;
739 foreach (DirectoryModel root in roots)
740 next_dir = StupidWalk (next_dir, root);
742 return next_dir;
745 public void DoneCrawlingOneDirectory (DirectoryModel dir)
747 if (! dir.IsAttached)
748 return;
750 FileAttributes attr;
751 attr = FileAttributesStore.Read (dir.FullName);
753 // Don't mark ourselves; let the crawler redo us
754 if (attr == null)
755 return;
757 // We don't have to be super-careful about this since
758 // we only use the FileAttributes mtime on a directory
759 // to determine its initial state, not whether or not
760 // its index record is up-to-date.
761 attr.LastWriteTime = DateTime.UtcNow;
763 // ...but we do use this to decide which order directories get
764 // crawled in.
765 dir.LastCrawlTime = DateTime.UtcNow;
767 FileAttributesStore.Write (attr);
768 dir.MarkAsClean ();
771 public void MarkDirectoryAsUncrawlable (DirectoryModel dir)
773 if (! dir.IsAttached)
774 return;
776 // If we managed to get set up a watch on this directory,
777 // drop it.
778 if (dir.WatchHandle != null) {
779 event_backend.ForgetWatch (dir.WatchHandle);
780 dir.WatchHandle = null;
783 dir.MarkAsUncrawlable ();
786 public void Recrawl (string path)
788 // Try to find a directory model for the path specified
789 // so that we can re-crawl it.
790 DirectoryModel dir;
791 dir = GetDirectoryModelByPath (path);
793 bool path_is_registered = true;
795 if (dir == null) {
796 dir = GetDirectoryModelByPath (FileSystem.GetDirectoryNameRootOk (path));
797 path_is_registered = false;
799 if (dir == null) {
800 Logger.Log.Debug ("Unable to get directory-model for path: {0}", path);
801 return;
805 Logger.Log.Debug ("Re-crawling {0}", dir.FullName);
807 if (tree_crawl_task.Add (dir))
808 ThisScheduler.Add (tree_crawl_task);
810 if (path_is_registered)
811 Recrawl_Recursive (dir, DirectoryState.PossiblyClean);
813 ActivateFileCrawling ();
814 ActivateDirectoryCrawling ();
817 public void RecrawlEverything ()
819 Logger.Log.Debug ("Re-crawling all directories");
821 foreach (DirectoryModel root in roots)
822 Recrawl_Recursive (root, DirectoryState.PossiblyClean);
824 ActivateFileCrawling ();
825 ActivateDirectoryCrawling ();
828 private void Recrawl_Recursive (DirectoryModel dir, DirectoryState state)
830 dir.State = state;
831 tree_crawl_task.Add (dir);
832 foreach (DirectoryModel sub_dir in dir.Children)
833 Recrawl_Recursive (sub_dir, state);
836 private void ActivateFileCrawling ()
838 if (! file_crawl_task.IsActive)
839 ThisScheduler.Add (file_crawl_task);
842 private void ActivateDirectoryCrawling ()
844 if (! tree_crawl_task.IsActive)
845 ThisScheduler.Add (tree_crawl_task);
848 //////////////////////////////////////////////////////////////////////////
851 // File-related methods
854 private enum RequiredAction {
855 None,
856 Index,
857 Rename,
858 Forget
861 static DateTime epoch = new DateTime (1970, 1, 1, 0, 0, 0);
863 static DateTime ToDateTimeUtc (long time_t)
865 return epoch.AddSeconds (time_t);
868 private RequiredAction DetermineRequiredAction (DirectoryModel dir,
869 string name,
870 FileAttributes attr,
871 out string last_known_path)
873 last_known_path = null;
875 string path;
876 path = Path.Combine (dir.FullName, name);
878 if (Debug)
879 Logger.Log.Debug ("*** What should we do with {0}?", path);
881 if (filter.Ignore (dir, name, false)) {
882 // If there are attributes on the file, we must have indexed
883 // it previously. Since we are ignoring it now, we should strip
884 // any file attributes from it.
885 if (attr != null) {
886 if (Debug)
887 Logger.Log.Debug ("*** Forget it: File is ignored but has attributes");
888 return RequiredAction.Forget;
890 if (Debug)
891 Logger.Log.Debug ("*** Do nothing: File is ignored");
892 return RequiredAction.None;
895 if (attr == null) {
896 if (Debug)
897 Logger.Log.Debug ("*** Index it: File has no attributes");
898 return RequiredAction.Index;
901 // FIXME: This does not take in to account that we might have a better matching filter to use now
902 // That, however, is kind of expensive to figure out since we'd have to do mime-sniffing and shit.
903 if (attr.FilterName != null && attr.FilterVersion > 0) {
904 int current_filter_version;
905 current_filter_version = FilterFactory.GetFilterVersion (attr.FilterName);
907 if (current_filter_version > attr.FilterVersion) {
908 if (Debug)
909 Logger.Log.Debug ("*** Index it: Newer filter version found for filter {0}", attr.FilterName);
910 return RequiredAction.Index;
914 Mono.Unix.Native.Stat stat;
915 try {
916 Mono.Unix.Native.Syscall.stat (path, out stat);
917 } catch (Exception ex) {
918 Logger.Log.Debug ("Caught exception stat-ing {0}", path);
919 Logger.Log.Debug (ex);
920 return RequiredAction.None;
923 DateTime last_write_time, last_attr_time;
924 last_write_time = ToDateTimeUtc (stat.st_mtime);
925 last_attr_time = ToDateTimeUtc (stat.st_ctime);
927 if (attr.LastWriteTime != last_write_time) {
928 if (Debug)
929 Logger.Log.Debug ("*** Index it: MTime has changed ({0} vs {1})", attr.LastWriteTime, last_write_time);
931 // If the file has been copied, it will have the
932 // original file's EAs. Thus we have to check to
933 // make sure that the unique id in the EAs actually
934 // belongs to this file. If not, replace it with a new one.
935 // (Thus touching & then immediately renaming a file can
936 // cause its unique id to change, which is less than
937 // optimal but probably can't be helped.)
938 last_known_path = UniqueIdToFullPath (attr.UniqueId);
939 if (path != last_known_path) {
940 if (Debug)
941 Logger.Log.Debug ("*** Name has also changed, assigning new unique id");
942 attr.UniqueId = Guid.NewGuid ();
945 return RequiredAction.Index;
948 // If the inode ctime is newer than the last time we last
949 // set file attributes, we might have been moved. We don't
950 // strictly compare times due to the fact that although
951 // setting xattrs changes the ctime, if we don't have write
952 // access our metadata will be stored in sqlite, and the
953 // ctime will be at some point in the past.
954 if (attr.LastAttrTime < last_attr_time) {
955 if (Debug)
956 Logger.Log.Debug ("*** CTime is newer, checking last known path ({0} vs {1})", attr.LastAttrTime, last_attr_time);
958 last_known_path = UniqueIdToFullPath (attr.UniqueId);
960 if (last_known_path == null) {
961 if (Debug)
962 Logger.Log.Debug ("*** Index it: CTime has changed, but can't determine last known path");
963 return RequiredAction.Index;
966 // If the name has changed but the mtime
967 // hasn't, the only logical conclusion is that
968 // the file has been renamed.
969 if (path != last_known_path) {
970 if (Debug)
971 Logger.Log.Debug ("*** Rename it: CTime and path has changed");
972 return RequiredAction.Rename;
976 // We don't have to do anything, which is always preferable.
977 if (Debug)
978 Logger.Log.Debug ("*** Do nothing");
979 return RequiredAction.None;
982 // Return an indexable that will do the right thing with a file
983 // (or null, if the right thing is to do nothing)
984 public Indexable GetCrawlingFileIndexable (DirectoryModel dir, string name)
986 string path;
987 path = Path.Combine (dir.FullName, name);
989 FileAttributes attr;
990 attr = FileAttributesStore.Read (path);
992 RequiredAction action;
993 string last_known_path;
994 action = DetermineRequiredAction (dir, name, attr, out last_known_path);
996 if (action == RequiredAction.None)
997 return null;
999 Guid unique_id;
1000 if (attr != null)
1001 unique_id = attr.UniqueId;
1002 else
1003 unique_id = Guid.NewGuid ();
1005 Indexable indexable = null;
1007 switch (action) {
1009 case RequiredAction.Index:
1010 indexable = FileToIndexable (path, unique_id, dir, true);
1011 break;
1013 case RequiredAction.Rename:
1014 indexable = NewRenamingIndexable (name, unique_id, dir,
1015 last_known_path);
1016 break;
1018 case RequiredAction.Forget:
1019 FileAttributesStore.Drop (path);
1021 break;
1024 return indexable;
1027 public void AddFile (DirectoryModel dir, string name)
1029 string path;
1030 path = Path.Combine (dir.FullName, name);
1032 if (! File.Exists (path))
1033 return;
1035 if (filter.Ignore (dir, name, false))
1036 return;
1038 // If this file already has extended attributes,
1039 // make sure that the name matches the file
1040 // that is in the index. If not, it could be
1041 // a copy of an already-indexed file and should
1042 // be assigned a new unique id.
1043 Guid unique_id = Guid.Empty;
1044 FileAttributes attr;
1045 attr = FileAttributesStore.Read (path);
1046 if (attr != null) {
1047 LuceneNameResolver.NameInfo info;
1048 info = name_resolver.GetNameInfoById (attr.UniqueId);
1049 if (info != null
1050 && info.Name == name
1051 && info.ParentId == dir.UniqueId)
1052 unique_id = attr.UniqueId;
1055 if (unique_id == Guid.Empty)
1056 unique_id = Guid.NewGuid ();
1058 RegisterId (name, dir, unique_id);
1060 Indexable indexable;
1061 indexable = FileToIndexable (path, unique_id, dir, false);
1063 if (indexable != null) {
1064 Scheduler.Task task;
1065 task = NewAddTask (indexable);
1066 task.Priority = Scheduler.Priority.Immediate;
1067 ThisScheduler.Add (task);
1071 public void RemoveFile (DirectoryModel dir, string name)
1073 // FIXME: We might as well remove it, even if it was being ignore.
1074 // Right?
1076 Guid unique_id;
1077 unique_id = NameAndParentToId (name, dir);
1078 if (unique_id == Guid.Empty) {
1079 Logger.Log.Info ("Could not resolve unique id of '{0}' in '{1}' for removal, it is probably already gone",
1080 name, dir.FullName);
1081 return;
1084 Uri uri, file_uri;
1085 uri = GuidFu.ToUri (unique_id);
1086 file_uri = UriFu.PathToFileUri (Path.Combine (dir.FullName, name));
1088 Indexable indexable;
1089 indexable = new Indexable (IndexableType.Remove, uri);
1090 indexable.LocalState ["RemovedUri"] = file_uri;
1092 Scheduler.Task task;
1093 task = NewAddTask (indexable);
1094 task.Priority = Scheduler.Priority.Immediate;
1095 ThisScheduler.Add (task);
1098 public void MoveFile (DirectoryModel old_dir, string old_name,
1099 DirectoryModel new_dir, string new_name)
1101 bool old_ignore, new_ignore;
1102 old_ignore = filter.Ignore (old_dir, old_name, false);
1103 new_ignore = filter.Ignore (new_dir, new_name, false);
1105 if (old_ignore && new_ignore)
1106 return;
1108 // If our ignore-state is changing, synthesize the appropriate
1109 // action.
1111 if (old_ignore && ! new_ignore) {
1112 AddFile (new_dir, new_name);
1113 return;
1116 if (! old_ignore && new_ignore) {
1117 RemoveFile (new_dir, new_name);
1118 return;
1121 // We need to find the file's unique id.
1122 // We can't look at the extended attributes w/o making
1123 // assumptions about whether they follow around the
1124 // file (EAs) or the path (sqlite)...
1125 Guid unique_id;
1126 unique_id = NameAndParentToId (old_name, old_dir);
1127 if (unique_id == Guid.Empty) {
1128 // If we can't find the unique ID, we have to
1129 // assume that the original file never made it
1130 // into the index --- thus we treat this as
1131 // an Add.
1132 AddFile (new_dir, new_name);
1133 return;
1136 RegisterId (new_name, new_dir, unique_id);
1138 string old_path;
1139 old_path = Path.Combine (old_dir.FullName, old_name);
1141 ForgetId (old_path);
1143 // FIXME: I think we need to be more conservative when we seen
1144 // events in a directory that has not been fully scanned, just to
1145 // avoid races. i.e. what if we are in the middle of crawling that
1146 // directory and haven't reached this file yet? Then the rename
1147 // will fail.
1148 Indexable indexable;
1149 indexable = NewRenamingIndexable (new_name,
1150 unique_id,
1151 new_dir,
1152 old_path);
1154 Scheduler.Task task;
1155 task = NewAddTask (indexable);
1156 task.Priority = Scheduler.Priority.Immediate;
1157 // Danger Will Robinson!
1158 // We need to use BlockUntilNoCollision to get the correct notifications
1159 // in a mv a b; mv b c; mv c a situation.
1160 // FIXME: And now AddType no longer exists
1161 ThisScheduler.Add (task);
1164 //////////////////////////////////////////////////////////////////////////
1166 // Configuration stuff
1168 public IList Roots {
1169 get {
1170 return roots_by_path;
1174 private void LoadConfiguration ()
1176 if (Conf.Indexing.IndexHomeDir)
1177 AddRoot (PathFinder.HomeDir);
1179 foreach (string root in Conf.Indexing.Roots)
1180 AddRoot (root);
1182 Conf.Subscribe (typeof (Conf.IndexingConfig), OnConfigurationChanged);
1185 private void OnConfigurationChanged (Conf.Section section)
1187 ArrayList roots_wanted = new ArrayList (Conf.Indexing.Roots);
1189 if (Conf.Indexing.IndexHomeDir)
1190 roots_wanted.Add (PathFinder.HomeDir);
1192 IList roots_to_add, roots_to_remove;
1193 ArrayFu.IntersectListChanges (roots_wanted, Roots, out roots_to_add, out roots_to_remove);
1195 foreach (string root in roots_to_remove)
1196 RemoveRoot (root);
1198 foreach (string root in roots_to_add)
1199 AddRoot (root);
1202 //////////////////////////////////////////////////////////////////////////
1205 // Our magic LuceneQueryable hooks
1208 override protected bool PreChildAddHook (Indexable child)
1210 // FIXME: Handling Uri remapping of children is tricky, and there
1211 // is also the issue of properly serializing file: uris that
1212 // contain fragments. For now we just punt it all by dropping
1213 // any child indexables of file system objects.
1214 return false;
1217 override protected void PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
1219 // If we just changed properties, remap to our *old* external Uri
1220 // to make notification work out property.
1221 if (indexable.Type == IndexableType.PropertyChange) {
1223 string last_known_path;
1224 last_known_path = (string) indexable.LocalState ["LastKnownPath"];
1225 receipt.Uri = UriFu.PathToFileUri (last_known_path);
1226 Logger.Log.Debug ("Last known path is {0}", last_known_path);
1228 // This rename is now in the index, so we no longer need to keep
1229 // track of the uid in memory.
1230 ForgetId (last_known_path);
1232 return;
1235 string path;
1236 path = (string) indexable.LocalState ["Path"];
1237 ForgetId (path);
1239 DirectoryModel parent;
1240 parent = indexable.LocalState ["Parent"] as DirectoryModel;
1242 // The parent directory might have run away since we were indexed
1243 if (parent != null && ! parent.IsAttached)
1244 return;
1246 Guid unique_id;
1247 unique_id = GuidFu.FromUri (receipt.Uri);
1249 FileAttributes attr;
1250 attr = FileAttributesStore.ReadOrCreate (path, unique_id);
1251 attr.Path = path;
1252 attr.LastWriteTime = indexable.Timestamp;
1254 attr.FilterName = receipt.FilterName;
1255 attr.FilterVersion = receipt.FilterVersion;
1257 if (indexable.LocalState ["IsWalkable"] != null) {
1258 string name;
1259 name = (string) indexable.LocalState ["Name"];
1261 if (! RegisterDirectory (name, parent, attr))
1262 return;
1265 FileAttributesStore.Write (attr);
1267 // Remap the Uri so that change notification will work properly
1268 receipt.Uri = UriFu.PathToFileUri (path);
1271 override protected void PostRemoveHook (Indexable indexable, IndexerRemovedReceipt receipt)
1273 // Find the cached external Uri and remap the Uri in the receipt.
1274 // We have to do this to make change notification work.
1275 Uri external_uri;
1276 external_uri = indexable.LocalState ["RemovedUri"] as Uri;
1277 if (external_uri == null)
1278 throw new Exception ("No cached external Uri for " + receipt.Uri);
1279 receipt.Uri = external_uri;
1280 ForgetId (external_uri.LocalPath);
1283 private bool RemapUri (Hit hit)
1285 // Store the hit's internal uri in a property
1286 Property prop;
1287 prop = Property.NewUnsearched ("beagle:InternalUri",
1288 UriFu.UriToSerializableString (hit.Uri));
1289 hit.AddProperty (prop);
1291 // Now assemble the path by looking at the parent and name
1292 string name, path;
1293 name = hit [ExactFilenamePropKey];
1294 if (name == null) {
1295 // If we don't have the filename property, we have to do a lookup
1296 // based on the guid. This happens with synthetic hits produced by
1297 // index listeners.
1298 Guid hit_id;
1299 hit_id = GuidFu.FromUri (hit.Uri);
1300 path = UniqueIdToFullPath (hit_id);
1301 } else {
1302 string parent_id_uri;
1303 parent_id_uri = hit [ParentDirUriPropKey];
1304 if (parent_id_uri == null)
1305 return false;
1307 Guid parent_id;
1308 parent_id = GuidFu.FromUriString (parent_id_uri);
1310 path = ToFullPath (name, parent_id);
1311 if (path == null)
1312 Logger.Log.Debug ("Couldn't find path of file with name '{0}' and parent '{1}'",
1313 name, GuidFu.ToShortString (parent_id));
1316 if (path != null) {
1317 hit.Uri = UriFu.PathToFileUri (path);
1318 return true;
1321 return false;
1324 // Hit filter: this handles our mapping from internal->external uris,
1325 // and checks to see if the file is still there.
1326 override protected bool HitFilter (Hit hit)
1328 Uri old_uri = hit.Uri;
1330 if (! RemapUri (hit))
1331 return false;
1333 string path;
1334 path = hit.Uri.LocalPath;
1336 bool is_directory;
1337 bool exists = false;
1339 is_directory = hit.MimeType == "inode/directory";
1341 if (hit.MimeType == null && hit.Uri.IsFile && Directory.Exists (path)) {
1342 is_directory = true;
1343 exists = true;
1346 if (! exists) {
1347 if (is_directory)
1348 exists = Directory.Exists (path);
1349 else
1350 exists = File.Exists (path);
1353 // If the file doesn't exist, we do not schedule a removal and
1354 // return false. This is to avoid "losing" files if they are
1355 // in a directory that has been renamed but which we haven't
1356 // scanned yet... if we dropped them from the index, they would
1357 // never get re-indexed (or at least not until the next time they
1358 // were touched) since they would still be stamped with EAs
1359 // indicating they were up-to-date. And that would be bad.
1360 // FIXME: It would be safe if we were in a known state, right?
1361 // i.e. every DirectoryModel is clean.
1362 if (! exists)
1363 return false;
1365 // Fetch the parent directory model from our cache to do clever
1366 // filtering to determine if we're ignoring it or not.
1367 DirectoryModel parent;
1368 parent = GetDirectoryModelByPath (Path.GetDirectoryName (path));
1370 // Check the ignore status of the hit
1371 if (filter.Ignore (parent, Path.GetFileName (path), is_directory))
1372 return false;
1374 return true;
1377 override public string GetSnippet (string [] query_terms, Hit hit)
1379 // Uri remapping from a hit is easy: the internal uri
1380 // is stored in a property.
1381 Uri uri;
1382 uri = UriFu.UriStringToUri (hit ["beagle:InternalUri"]);
1384 string path;
1385 path = TextCache.UserCache.LookupPathRaw (uri);
1387 if (path == null)
1388 return null;
1390 // If this is self-cached, use the remapped Uri
1391 if (path == TextCache.SELF_CACHE_TAG)
1392 path = hit.Uri.LocalPath;
1394 return SnippetFu.GetSnippetFromFile (query_terms, path);
1397 override public void Start ()
1399 base.Start ();
1401 event_backend.Start (this);
1403 LoadConfiguration ();
1405 Logger.Log.Debug ("Done starting FileSystemQueryable");
1408 //////////////////////////////////////////////////////////////////////////
1410 // These are the methods that the IFileEventBackend implementations should
1411 // call in response to events.
1413 public void ReportEventInDirectory (string directory_name)
1415 DirectoryModel dir;
1416 dir = GetDirectoryModelByPath (directory_name);
1418 // If something goes wrong, just fail silently.
1419 if (dir == null)
1420 return;
1422 // We only use this information to prioritize the order in which
1423 // we crawl directories --- so if this directory doesn't
1424 // actually need to be crawled, we can safely ignore it.
1425 if (! dir.NeedsCrawl)
1426 return;
1428 dir.LastActivityTime = DateTime.Now;
1430 Logger.Log.Debug ("Saw event in '{0}'", directory_name);
1433 public void HandleAddEvent (string directory_name, string file_name, bool is_directory)
1435 Logger.Log.Debug ("*** Add '{0}' '{1}' {2}", directory_name, file_name,
1436 is_directory ? "(dir)" : "(file)");
1438 DirectoryModel dir;
1439 dir = GetDirectoryModelByPath (directory_name);
1440 if (dir == null) {
1441 Logger.Log.Warn ("HandleAddEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1442 return;
1445 if (is_directory)
1446 AddDirectory (dir, file_name);
1447 else
1448 AddFile (dir, file_name);
1451 public void HandleRemoveEvent (string directory_name, string file_name, bool is_directory)
1453 Logger.Log.Debug ("*** Remove '{0}' '{1}' {2}", directory_name, file_name,
1454 is_directory ? "(dir)" : "(file)");
1456 if (is_directory) {
1457 string path;
1458 path = Path.Combine (directory_name, file_name);
1460 DirectoryModel dir;
1461 dir = GetDirectoryModelByPath (path);
1462 if (dir == null) {
1463 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", path);
1464 return;
1467 dir.WatchHandle = null;
1468 RemoveDirectory (dir);
1469 } else {
1470 DirectoryModel dir;
1471 dir = GetDirectoryModelByPath (directory_name);
1472 if (dir == null) {
1473 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1474 return;
1477 RemoveFile (dir, file_name);
1481 public void HandleMoveEvent (string old_directory_name, string old_file_name,
1482 string new_directory_name, string new_file_name,
1483 bool is_directory)
1485 Logger.Log.Debug ("*** Move '{0}' '{1}' -> '{2}' '{3}' {4}",
1486 old_directory_name, old_file_name,
1487 new_directory_name, new_file_name,
1488 is_directory ? "(dir)" : "(file)");
1490 if (is_directory) {
1491 DirectoryModel dir, new_parent;
1492 dir = GetDirectoryModelByPath (Path.Combine (old_directory_name, old_file_name));
1493 new_parent = GetDirectoryModelByPath (new_directory_name);
1494 MoveDirectory (dir, new_parent, new_file_name);
1495 return;
1496 } else {
1497 DirectoryModel old_dir, new_dir;
1498 old_dir = GetDirectoryModelByPath (old_directory_name);
1499 new_dir = GetDirectoryModelByPath (new_directory_name);
1500 MoveFile (old_dir, old_file_name, new_dir, new_file_name);
1504 public void HandleOverflowEvent ()
1506 Logger.Log.Debug ("Queue overflows suck");