Add --enable-deletion option to buildindex. If used, buildindex will remove deleted...
[beagle.git] / beagled / FileSystemQueryable / FileSystemQueryable.cs
blob5c0f767620d535a3920ae4fcaeac4c7719d3af74
1 //
2 // FileSystemQueryable.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
30 using System.Reflection;
31 using System.Text;
32 using System.Threading;
34 using Beagle.Daemon;
35 using Beagle.Util;
37 namespace Beagle.Daemon.FileSystemQueryable {
39 [QueryableFlavor (Name="Files", Domain=QueryDomain.Local, RequireInotify=false)]
40 [PropertyKeywordMapping (Keyword="extension", PropertyName="beagle:FilenameExtension", IsKeyword=true, Description="File extension, e.g. extension:jpeg. Use extension: to search in files with no extension.")]
41 [PropertyKeywordMapping (Keyword="ext", PropertyName="beagle:FilenameExtension", IsKeyword=true, Description="File extension, e.g. ext:jpeg. Use ext: to search in files with no extension.")]
42 public class FileSystemQueryable : LuceneQueryable {
44 static public bool Debug = false;
46 private const string SplitFilenamePropKey = "beagle:SplitFilename";
47 public const string ExactFilenamePropKey = "beagle:ExactFilename";
48 public const string TextFilenamePropKey = "beagle:Filename";
49 public const string NoPunctFilenamePropKey = "beagle:NoPunctFilename";
50 public const string FilenameExtensionPropKey = "beagle:FilenameExtension";
51 public const string ParentDirUriPropKey = LuceneQueryingDriver.PrivateNamespace + "ParentDirUri";
52 public const string IsDirectoryPropKey = LuceneQueryingDriver.PrivateNamespace + "IsDirectory";
54 // History:
55 // 1: Initially set to force a reindex due to NameIndex changes.
56 // 2: Overhauled everything to use new lucene infrastructure.
57 // 3: Switched to UTC for all times, changed the properties a bit.
58 // 4: Changed the key of TextFilenamePropKey to beagle:Filename - it might be useful in clients.
59 // Make SplitFilenamePropKey unstored
60 const int MINOR_VERSION = 4;
62 private object big_lock = new object ();
64 private IFileEventBackend event_backend;
66 // This is the task that walks the tree structure
67 private TreeCrawlTask tree_crawl_task;
69 // This is the task that finds the next place that
70 // needs to be crawled in the tree and spawns off
71 // the appropriate IndexableGenerator.
72 private FileCrawlTask file_crawl_task;
74 private ArrayList roots = new ArrayList ();
75 private ArrayList roots_by_path = new ArrayList ();
77 private FileNameFilter filter;
79 // This is just a copy of the LuceneQueryable's QueryingDriver
80 // cast into the right type for doing internal->external Uri
81 // lookups.
82 private LuceneNameResolver name_resolver;
84 //////////////////////////////////////////////////////////////////////////
86 private Hashtable cached_uid_by_path = new Hashtable ();
88 //////////////////////////////////////////////////////////////////////////
90 public FileSystemQueryable () : base ("FileSystemIndex", MINOR_VERSION)
92 // Set up our event backend
93 if (Inotify.Enabled) {
94 Logger.Log.Debug ("Starting Inotify Backend");
95 event_backend = new InotifyBackend ();
96 } else {
97 Logger.Log.Debug ("Creating null file event backend");
98 event_backend = new NullFileEventBackend ();
101 tree_crawl_task = new TreeCrawlTask (new TreeCrawlTask.Handler (AddDirectory));
102 tree_crawl_task.Source = this;
104 file_crawl_task = new FileCrawlTask (this);
105 file_crawl_task.Source = this;
107 name_resolver = (LuceneNameResolver) Driver;
108 PreloadDirectoryNameInfo ();
110 // Setup our file-name filter
111 filter = new FileNameFilter (this);
113 // Do the right thing when paths expire
114 DirectoryModel.ExpireEvent +=
115 new DirectoryModel.ExpireHandler (ExpireDirectoryPath);
119 override protected IFileAttributesStore BuildFileAttributesStore ()
121 return new FileAttributesStore_Mixed (IndexDirectory, IndexFingerprint);
124 override protected LuceneQueryingDriver BuildLuceneQueryingDriver (string index_name,
125 int minor_version,
126 bool read_only_mode)
128 return new LuceneNameResolver (index_name, minor_version, read_only_mode);
131 public FileNameFilter Filter {
132 get { return filter; }
135 //////////////////////////////////////////////////////////////////////////
138 // This is where we build our Indexables
141 public static void AddStandardPropertiesToIndexable (Indexable indexable,
142 string name,
143 Guid parent_id,
144 bool mutable)
146 StringBuilder sb;
147 sb = new StringBuilder ();
149 string no_ext, ext, no_punct;
150 no_ext = Path.GetFileNameWithoutExtension (name);
151 ext = Path.GetExtension (name).ToLower ();
153 sb.Append (no_ext);
154 for (int i = 0; i < sb.Length; ++i)
155 if (! Char.IsLetterOrDigit (sb [i]))
156 sb [i] = ' ';
157 no_punct = sb.ToString ();
160 Property prop;
162 prop = Property.NewKeyword (ExactFilenamePropKey, name);
163 prop.IsMutable = mutable;
164 indexable.AddProperty (prop);
166 prop = Property.New (TextFilenamePropKey, no_ext);
167 prop.IsMutable = mutable;
168 indexable.AddProperty (prop);
170 prop = Property.New (NoPunctFilenamePropKey, no_punct);
171 prop.IsMutable = mutable;
172 indexable.AddProperty (prop);
174 prop = Property.NewUnsearched (FilenameExtensionPropKey, ext);
175 prop.IsMutable = mutable;
176 indexable.AddProperty (prop);
178 string str;
179 str = StringFu.FuzzyDivide (no_ext);
180 prop = Property.NewUnstored (SplitFilenamePropKey, str);
181 prop.IsMutable = mutable;
182 indexable.AddProperty (prop);
184 if (parent_id == Guid.Empty)
185 return;
187 str = GuidFu.ToUriString (parent_id);
188 // We use the uri here to recycle terms in the index,
189 // since each directory's uri will already be indexed.
190 prop = Property.NewUnsearched (ParentDirUriPropKey, str);
191 prop.IsMutable = mutable;
192 indexable.AddProperty (prop);
195 public static void AddStandardPropertiesToIndexable (Indexable indexable,
196 string name,
197 DirectoryModel parent,
198 bool mutable)
200 AddStandardPropertiesToIndexable (indexable,
201 name,
202 parent == null ? Guid.Empty : parent.UniqueId,
203 mutable);
205 indexable.LocalState ["Parent"] = parent;
208 public static Indexable DirectoryToIndexable (string path,
209 Guid id,
210 DirectoryModel parent)
212 Indexable indexable;
213 try {
214 indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id));
215 indexable.MimeType = "inode/directory";
216 indexable.NoContent = true;
217 indexable.Timestamp = Directory.GetLastWriteTimeUtc (path);
218 } catch (IOException) {
219 // Looks like the directory was deleted.
220 return null;
223 string name;
224 if (parent == null)
225 name = path;
226 else
227 name = Path.GetFileName (path);
228 AddStandardPropertiesToIndexable (indexable, name, parent, true);
230 Property prop;
231 prop = Property.NewBool (IsDirectoryPropKey, true);
232 prop.IsMutable = true; // we want this in the secondary index, for efficiency
233 indexable.AddProperty (prop);
235 indexable.LocalState ["Path"] = path;
237 return indexable;
240 public static Indexable FileToIndexable (string path,
241 Guid id,
242 DirectoryModel parent,
243 bool crawl_mode)
245 Indexable indexable;
247 try {
248 indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id));
249 indexable.Timestamp = File.GetLastWriteTimeUtc (path);
250 indexable.ContentUri = UriFu.PathToFileUri (path);
251 indexable.Crawled = crawl_mode;
252 indexable.Filtering = Beagle.IndexableFiltering.Always;
253 } catch (IOException) {
254 // Looks like the file was deleted.
255 return null;
258 AddStandardPropertiesToIndexable (indexable, Path.GetFileName (path), parent, true);
260 indexable.LocalState ["Path"] = path;
262 return indexable;
265 private static Indexable NewRenamingIndexable (string name,
266 Guid id,
267 DirectoryModel parent,
268 string last_known_path)
270 Indexable indexable;
271 indexable = new Indexable (IndexableType.PropertyChange, GuidFu.ToUri (id));
273 AddStandardPropertiesToIndexable (indexable, name, parent, true);
275 indexable.LocalState ["Id"] = id;
276 indexable.LocalState ["LastKnownPath"] = last_known_path;
278 return indexable;
281 //////////////////////////////////////////////////////////////////////////
284 // Mapping from directory ids to paths
287 private Hashtable dir_models_by_id = new Hashtable ();
288 private Hashtable name_info_by_id = new Hashtable ();
290 // We fall back to using the name information in the index
291 // until we've fully constructed our set of DirectoryModels.
292 private void PreloadDirectoryNameInfo ()
294 ICollection all;
295 all = name_resolver.GetAllDirectoryNameInfo ();
296 foreach (LuceneNameResolver.NameInfo info in all)
297 name_info_by_id [info.Id] = info;
300 // This only works for directories.
301 private string UniqueIdToDirectoryName (Guid id)
303 DirectoryModel dir;
304 dir = dir_models_by_id [id] as DirectoryModel;
305 if (dir != null)
306 return dir.FullName;
308 LuceneNameResolver.NameInfo info;
309 info = name_info_by_id [id] as LuceneNameResolver.NameInfo;
310 if (info != null) {
311 if (info.ParentId == Guid.Empty) // i.e. this is a root
312 return info.Name;
313 else {
314 string parent_name;
315 parent_name = UniqueIdToDirectoryName (info.ParentId);
316 if (parent_name == null)
317 return null;
318 return Path.Combine (parent_name, info.Name);
322 return null;
325 private void CacheDirectoryNameChange (Guid id, Guid new_parent_id, string new_name)
327 LuceneNameResolver.NameInfo info;
328 info = name_info_by_id [id] as LuceneNameResolver.NameInfo;
329 if (info != null) {
330 info.ParentId = new_parent_id;
331 info.Name = new_name;
335 private string ToFullPath (string name, Guid parent_id)
337 // This is the correct behavior for roots.
338 if (parent_id == Guid.Empty)
339 return name;
341 string parent_name;
342 parent_name = UniqueIdToDirectoryName (parent_id);
343 if (parent_name == null)
344 return null;
346 return Path.Combine (parent_name, name);
349 // This works for both files and directories.
350 private string UniqueIdToFullPath (Guid id)
352 // First, check if it is a directory.
353 string path;
354 path = UniqueIdToDirectoryName (id);
355 if (path != null)
356 return path;
358 // If not, try to pull name information out of the index.
359 LuceneNameResolver.NameInfo info;
360 info = name_resolver.GetNameInfoById (id);
361 if (info == null)
362 return null;
363 return ToFullPath (info.Name, info.ParentId);
366 private void RegisterId (string name, DirectoryModel dir, Guid id)
368 cached_uid_by_path [Path.Combine (dir.FullName, name)] = id;
371 private void ForgetId (string path)
373 cached_uid_by_path.Remove (path);
376 // This works for files. (It probably works for directories
377 // too, but you should use one of the more efficient means
378 // above if you know it is a directory.)
379 private Guid NameAndParentToId (string name, DirectoryModel dir)
381 string path;
382 path = Path.Combine (dir.FullName, name);
384 Guid unique_id;
385 if (cached_uid_by_path.Contains (path))
386 unique_id = (Guid) cached_uid_by_path [path];
387 else
388 unique_id = name_resolver.GetIdByNameAndParentId (name, dir.UniqueId);
390 return unique_id;
393 //////////////////////////////////////////////////////////////////////////
396 // Directory-related methods
399 private Hashtable dir_models_by_path = new Hashtable ();
401 private DirectoryModel GetDirectoryModelByPath (string path)
403 DirectoryModel dir;
405 lock (dir_models_by_path) {
406 dir = dir_models_by_path [path] as DirectoryModel;
407 if (dir != null)
408 return dir;
411 // Walk each root until we find the correct path
412 foreach (DirectoryModel root in roots) {
413 dir = root.WalkTree (path);
414 if (dir != null) {
415 lock (dir_models_by_path)
416 dir_models_by_path [path] = dir;
417 break;
421 return dir;
424 private void ExpireDirectoryPath (string expired_path, Guid unique_id)
426 if (Debug)
427 Logger.Log.Debug ("Expired '{0}'", expired_path);
429 lock (dir_models_by_path)
430 dir_models_by_path.Remove (expired_path);
433 public void AddDirectory (DirectoryModel parent, string name)
435 // Ignore the stuff we want to ignore.
436 if (filter.Ignore (parent, name, true))
437 return;
439 // FIXME: ! parent.HasChildWithName (name)
440 if (parent != null && parent.HasChildWithName (name))
441 return;
443 string path;
444 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
446 if (Debug)
447 Logger.Log.Debug ("Adding directory '{0}'", path, name);
449 if (! Directory.Exists (path)) {
450 Logger.Log.Error ("Can't add directory: '{0}' does not exist", path);
451 return;
454 FileAttributes attr;
455 attr = FileAttributesStore.Read (path);
457 // Note that we don't look at the mtime of a directory when
458 // deciding whether or not to index it.
459 bool needs_indexing = false;
460 if (attr == null) {
461 // If it has no attributes, it definitely needs
462 // indexing.
463 needs_indexing = true;
464 } else {
465 // Make sure that it still has the same name as before.
466 // If not, we need to re-index it.
467 // We can do this since we preloaded all of the name
468 // info in the directory via PreloadDirectoryNameInfo.
469 string last_known_name;
470 last_known_name = UniqueIdToDirectoryName (attr.UniqueId);
471 if (last_known_name != path) {
472 Logger.Log.Debug ("'{0}' now seems to be called '{1}'", last_known_name, path);
473 needs_indexing = true;
477 // If we can't descend into this directory, we want to
478 // index it but not build a DirectoryModel for it.
479 // FIXME: We should do the right thing when a
480 // directory's permissions change.
481 bool is_walkable;
482 is_walkable = DirectoryWalker.IsWalkable (path);
483 if (! is_walkable)
484 Logger.Log.Debug ("Can't walk '{0}'", path);
486 if (needs_indexing)
487 ScheduleDirectory (name, parent, attr, is_walkable);
488 else if (is_walkable)
489 RegisterDirectory (name, parent, attr);
492 public void AddRoot (string path)
494 path = StringFu.SanitizePath (path);
495 Logger.Log.Debug ("Adding root: {0}", path);
497 if (roots_by_path.Contains (path)) {
498 Logger.Log.Error ("Trying to add an existing root: {0}", path);
499 return;
502 // We need to have the path key in the roots hashtable
503 // for the filtering to work as we'd like before the root
504 // is actually added.
505 roots_by_path.Add (path);
507 AddDirectory (null, path);
510 public void RemoveRoot (string path)
512 Logger.Log.Debug ("Removing root: {0}", path);
514 if (! roots_by_path.Contains (path)) {
515 Logger.Log.Error ("Trying to remove a non-existing root: {0}", path);
516 return;
519 // Find our directory model for the root
520 DirectoryModel dir;
521 dir = GetDirectoryModelByPath (path);
523 if (dir == null) {
524 Logger.Log.Error ("Could not find directory-model for root: {0}", path);
525 return;
528 // FIXME: Make sure we're emptying the crawler task of any sub-directories
529 // to the root we're removing. It's not a big deal since we do an Ignore-check
530 // in there, but it would be nice.
532 roots_by_path.Remove (path);
533 roots.Remove (dir);
535 // Clean out the root from our directory cache.
536 RemoveDirectory (dir);
539 private void ScheduleDirectory (string name,
540 DirectoryModel parent,
541 FileAttributes attr,
542 bool is_walkable)
544 string path;
545 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
547 Guid id;
548 id = (attr == null) ? Guid.NewGuid () : attr.UniqueId;
550 DateTime last_crawl;
551 last_crawl = (attr == null) ? DateTime.MinValue : attr.LastWriteTime;
553 Indexable indexable;
554 indexable = DirectoryToIndexable (path, id, parent);
556 if (indexable != null) {
557 indexable.LocalState ["Name"] = name;
558 indexable.LocalState ["LastCrawl"] = last_crawl;
559 indexable.LocalState ["IsWalkable"] = is_walkable;
561 Scheduler.Task task;
562 task = NewAddTask (indexable);
563 task.Priority = Scheduler.Priority.Delayed;
564 ThisScheduler.Add (task);
568 private bool RegisterDirectory (string name, DirectoryModel parent, FileAttributes attr)
570 string path;
571 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
573 if (Debug)
574 Logger.Log.Debug ("Registered directory '{0}' ({1})", path, attr.UniqueId);
576 DateTime mtime;
578 try {
579 mtime = Directory.GetLastWriteTimeUtc (path);
580 } catch (IOException) {
581 Log.Debug ("Directory '{0}' ({1}) appears to have gone away", path, attr.UniqueId);
582 return false;
585 DirectoryModel dir;
586 if (parent == null)
587 dir = DirectoryModel.NewRoot (big_lock, path, attr);
588 else
589 dir = parent.AddChild (name, attr);
591 if (mtime > attr.LastWriteTime) {
592 dir.State = DirectoryState.Dirty;
593 if (Debug)
594 Logger.Log.Debug ("'{0}' is dirty", path);
597 if (Debug) {
598 if (dir.IsRoot)
599 Logger.Log.Debug ("Created model '{0}'", dir.FullName);
600 else
601 Logger.Log.Debug ("Created model '{0}' with parent '{1}'", dir.FullName, dir.Parent.FullName);
604 // Add any roots we create to the list of roots
605 if (dir.IsRoot)
606 roots.Add (dir);
608 // Add the directory to our by-id hash, and remove any NameInfo
609 // we might have cached about it.
610 dir_models_by_id [dir.UniqueId] = dir;
611 name_info_by_id.Remove (dir.UniqueId);
613 // Start watching the directory.
614 dir.WatchHandle = event_backend.CreateWatch (path);
616 // Schedule this directory for crawling.
617 if (tree_crawl_task.Add (dir))
618 ThisScheduler.Add (tree_crawl_task);
620 // Make sure that our file crawling task is active,
621 // since presumably we now have something new to crawl.
622 ActivateFileCrawling ();
624 return true;
627 private void ForgetDirectoryRecursively (DirectoryModel dir)
629 foreach (DirectoryModel child in dir.Children)
630 ForgetDirectoryRecursively (child);
632 if (dir.WatchHandle != null)
633 event_backend.ForgetWatch (dir.WatchHandle);
634 dir_models_by_id.Remove (dir.UniqueId);
635 // We rely on the expire event to remove it from dir_models_by_path
638 private void RemoveDirectory (DirectoryModel dir)
640 Uri uri;
641 uri = GuidFu.ToUri (dir.UniqueId);
643 Indexable indexable;
644 indexable = new Indexable (IndexableType.Remove, uri);
646 // Remember a copy of our external Uri, so that we can
647 // easily remap it in the PostRemoveHook.
648 indexable.LocalState ["RemovedUri"] = UriFu.PathToFileUri (dir.FullName);
650 // Forget watches and internal references
651 ForgetDirectoryRecursively (dir);
653 // Calling Remove will expire the path names,
654 // so name caches will be cleaned up accordingly.
655 dir.Remove ();
657 Scheduler.Task task;
658 task = NewAddTask (indexable); // We *add* the indexable to *remove* the index item
659 task.Priority = Scheduler.Priority.Immediate;
660 ThisScheduler.Add (task);
663 public void RemoveDirectory (string path)
665 DirectoryModel dir = GetDirectoryModelByPath (path);
666 if (dir != null)
667 RemoveDirectory (dir);
670 private void MoveDirectory (DirectoryModel dir,
671 DirectoryModel new_parent, // or null if we are just renaming
672 string new_name)
674 if (dir == null) {
675 Logger.Log.Warn ("Couldn't find DirectoryModel for directory moving to '{0}' in '{1}', so it was hopefully never there.",
676 new_name, new_parent.FullName);
677 AddDirectory (new_parent, new_name);
678 return;
681 if (dir.IsRoot)
682 throw new Exception ("Can't move root " + dir.FullName);
684 // We'll need this later in order to generate the
685 // right change notification.
686 string old_path;
687 old_path = dir.FullName;
689 if (new_parent != null && new_parent != dir.Parent)
690 dir.MoveTo (new_parent, new_name);
691 else
692 dir.Name = new_name;
694 // Remember this by path
695 lock (dir_models_by_path)
696 dir_models_by_path [dir.FullName] = dir;
698 CacheDirectoryNameChange (dir.UniqueId, dir.Parent.UniqueId, new_name);
700 Indexable indexable;
701 indexable = NewRenamingIndexable (new_name,
702 dir.UniqueId,
703 dir.Parent, // == new_parent
704 old_path);
705 indexable.LocalState ["OurDirectoryModel"] = dir;
707 Scheduler.Task task;
708 task = NewAddTask (indexable);
709 task.Priority = Scheduler.Priority.Immediate;
710 // Danger Will Robinson!
711 // We need to use BlockUntilNoCollision to get the correct notifications
712 // in a mv a b; mv b c; mv c a situation.
713 // FIXME: And now that type no longer exists!
714 ThisScheduler.Add (task);
717 //////////////////////////////////////////////////////////////////////////
720 // This code controls the directory crawl order
723 private DirectoryModel StupidWalk (DirectoryModel prev_best, DirectoryModel contender)
725 if (contender.NeedsCrawl) {
726 if (prev_best == null || prev_best.CompareTo (contender) < 0)
727 prev_best = contender;
730 foreach (DirectoryModel child in contender.Children)
731 prev_best = StupidWalk (prev_best, child);
733 return prev_best;
736 public DirectoryModel GetNextDirectoryToCrawl ()
738 DirectoryModel next_dir = null;
740 foreach (DirectoryModel root in roots)
741 next_dir = StupidWalk (next_dir, root);
743 return next_dir;
746 public void DoneCrawlingOneDirectory (DirectoryModel dir)
748 if (! dir.IsAttached)
749 return;
751 FileAttributes attr;
752 attr = FileAttributesStore.Read (dir.FullName);
754 // Don't mark ourselves; let the crawler redo us
755 if (attr == null)
756 return;
758 // We don't have to be super-careful about this since
759 // we only use the FileAttributes mtime on a directory
760 // to determine its initial state, not whether or not
761 // its index record is up-to-date.
762 attr.LastWriteTime = DateTime.UtcNow;
764 // ...but we do use this to decide which order directories get
765 // crawled in.
766 dir.LastCrawlTime = DateTime.UtcNow;
768 FileAttributesStore.Write (attr);
769 dir.MarkAsClean ();
772 public void MarkDirectoryAsUncrawlable (DirectoryModel dir)
774 if (! dir.IsAttached)
775 return;
777 // If we managed to get set up a watch on this directory,
778 // drop it.
779 if (dir.WatchHandle != null) {
780 event_backend.ForgetWatch (dir.WatchHandle);
781 dir.WatchHandle = null;
784 dir.MarkAsUncrawlable ();
787 public void Recrawl (string path)
789 // Try to find a directory model for the path specified
790 // so that we can re-crawl it.
791 DirectoryModel dir;
792 dir = GetDirectoryModelByPath (path);
794 bool path_is_registered = true;
796 if (dir == null) {
797 dir = GetDirectoryModelByPath (FileSystem.GetDirectoryNameRootOk (path));
798 path_is_registered = false;
800 if (dir == null) {
801 Logger.Log.Debug ("Unable to get directory-model for path: {0}", path);
802 return;
806 Logger.Log.Debug ("Re-crawling {0}", dir.FullName);
808 if (tree_crawl_task.Add (dir))
809 ThisScheduler.Add (tree_crawl_task);
811 if (path_is_registered)
812 Recrawl_Recursive (dir, DirectoryState.PossiblyClean);
814 ActivateFileCrawling ();
815 ActivateDirectoryCrawling ();
818 public void RecrawlEverything ()
820 Logger.Log.Debug ("Re-crawling all directories");
822 foreach (DirectoryModel root in roots)
823 Recrawl_Recursive (root, DirectoryState.PossiblyClean);
825 ActivateFileCrawling ();
826 ActivateDirectoryCrawling ();
829 private void Recrawl_Recursive (DirectoryModel dir, DirectoryState state)
831 dir.State = state;
832 tree_crawl_task.Add (dir);
833 foreach (DirectoryModel sub_dir in dir.Children)
834 Recrawl_Recursive (sub_dir, state);
837 private void ActivateFileCrawling ()
839 if (! file_crawl_task.IsActive)
840 ThisScheduler.Add (file_crawl_task);
843 private void ActivateDirectoryCrawling ()
845 if (! tree_crawl_task.IsActive)
846 ThisScheduler.Add (tree_crawl_task);
849 //////////////////////////////////////////////////////////////////////////
852 // File-related methods
855 private enum RequiredAction {
856 None,
857 Index,
858 Rename,
859 Forget
862 static DateTime epoch = new DateTime (1970, 1, 1, 0, 0, 0);
864 static DateTime ToDateTimeUtc (long time_t)
866 return epoch.AddSeconds (time_t);
869 private RequiredAction DetermineRequiredAction (DirectoryModel dir,
870 string name,
871 FileAttributes attr,
872 out string last_known_path)
874 last_known_path = null;
876 string path;
877 path = Path.Combine (dir.FullName, name);
879 if (Debug)
880 Logger.Log.Debug ("*** What should we do with {0}?", path);
882 if (filter.Ignore (dir, name, false)) {
883 // If there are attributes on the file, we must have indexed
884 // it previously. Since we are ignoring it now, we should strip
885 // any file attributes from it.
886 if (attr != null) {
887 if (Debug)
888 Logger.Log.Debug ("*** Forget it: File is ignored but has attributes");
889 return RequiredAction.Forget;
891 if (Debug)
892 Logger.Log.Debug ("*** Do nothing: File is ignored");
893 return RequiredAction.None;
896 if (attr == null) {
897 if (Debug)
898 Logger.Log.Debug ("*** Index it: File has no attributes");
899 return RequiredAction.Index;
902 // FIXME: This does not take in to account that we might have a better matching filter to use now
903 // That, however, is kind of expensive to figure out since we'd have to do mime-sniffing and shit.
904 if (attr.FilterName != null && attr.FilterVersion > 0) {
905 int current_filter_version;
906 current_filter_version = FilterFactory.GetFilterVersion (attr.FilterName);
908 if (current_filter_version > attr.FilterVersion) {
909 if (Debug)
910 Logger.Log.Debug ("*** Index it: Newer filter version found for filter {0}", attr.FilterName);
911 return RequiredAction.Index;
915 Mono.Unix.Native.Stat stat;
916 try {
917 Mono.Unix.Native.Syscall.stat (path, out stat);
918 } catch (Exception ex) {
919 Logger.Log.Debug ("Caught exception stat-ing {0}", path);
920 Logger.Log.Debug (ex);
921 return RequiredAction.None;
924 DateTime last_write_time, last_attr_time;
925 last_write_time = ToDateTimeUtc (stat.st_mtime);
926 last_attr_time = ToDateTimeUtc (stat.st_ctime);
928 if (attr.LastWriteTime != last_write_time) {
929 if (Debug)
930 Logger.Log.Debug ("*** Index it: MTime has changed ({0} vs {1})", attr.LastWriteTime, last_write_time);
932 // If the file has been copied, it will have the
933 // original file's EAs. Thus we have to check to
934 // make sure that the unique id in the EAs actually
935 // belongs to this file. If not, replace it with a new one.
936 // (Thus touching & then immediately renaming a file can
937 // cause its unique id to change, which is less than
938 // optimal but probably can't be helped.)
939 last_known_path = UniqueIdToFullPath (attr.UniqueId);
940 if (path != last_known_path) {
941 if (Debug)
942 Logger.Log.Debug ("*** Name has also changed, assigning new unique id");
943 attr.UniqueId = Guid.NewGuid ();
946 return RequiredAction.Index;
949 // If the inode ctime is newer than the last time we last
950 // set file attributes, we might have been moved. We don't
951 // strictly compare times due to the fact that although
952 // setting xattrs changes the ctime, if we don't have write
953 // access our metadata will be stored in sqlite, and the
954 // ctime will be at some point in the past.
955 if (attr.LastAttrTime < last_attr_time) {
956 if (Debug)
957 Logger.Log.Debug ("*** CTime is newer, checking last known path ({0} vs {1})", attr.LastAttrTime, last_attr_time);
959 last_known_path = UniqueIdToFullPath (attr.UniqueId);
961 if (last_known_path == null) {
962 if (Debug)
963 Logger.Log.Debug ("*** Index it: CTime has changed, but can't determine last known path");
964 return RequiredAction.Index;
967 // If the name has changed but the mtime
968 // hasn't, the only logical conclusion is that
969 // the file has been renamed.
970 if (path != last_known_path) {
971 if (Debug)
972 Logger.Log.Debug ("*** Rename it: CTime and path has changed");
973 return RequiredAction.Rename;
977 // We don't have to do anything, which is always preferable.
978 if (Debug)
979 Logger.Log.Debug ("*** Do nothing");
980 return RequiredAction.None;
983 // Return an indexable that will do the right thing with a file
984 // (or null, if the right thing is to do nothing)
985 public Indexable GetCrawlingFileIndexable (DirectoryModel dir, string name)
987 string path;
988 path = Path.Combine (dir.FullName, name);
990 FileAttributes attr;
991 attr = FileAttributesStore.Read (path);
993 RequiredAction action;
994 string last_known_path;
995 action = DetermineRequiredAction (dir, name, attr, out last_known_path);
997 if (action == RequiredAction.None)
998 return null;
1000 Guid unique_id;
1001 if (attr != null)
1002 unique_id = attr.UniqueId;
1003 else
1004 unique_id = Guid.NewGuid ();
1006 Indexable indexable = null;
1008 switch (action) {
1010 case RequiredAction.Index:
1011 indexable = FileToIndexable (path, unique_id, dir, true);
1012 break;
1014 case RequiredAction.Rename:
1015 indexable = NewRenamingIndexable (name, unique_id, dir,
1016 last_known_path);
1017 break;
1019 case RequiredAction.Forget:
1020 FileAttributesStore.Drop (path);
1022 break;
1025 return indexable;
1028 public void AddFile (DirectoryModel dir, string name)
1030 string path;
1031 path = Path.Combine (dir.FullName, name);
1033 if (! File.Exists (path))
1034 return;
1036 if (FileSystem.IsSpecialFile (path))
1037 return;
1039 if (filter.Ignore (dir, name, false))
1040 return;
1042 // If this file already has extended attributes,
1043 // make sure that the name matches the file
1044 // that is in the index. If not, it could be
1045 // a copy of an already-indexed file and should
1046 // be assigned a new unique id.
1047 Guid unique_id = Guid.Empty;
1048 FileAttributes attr;
1049 attr = FileAttributesStore.Read (path);
1050 if (attr != null) {
1051 LuceneNameResolver.NameInfo info;
1052 info = name_resolver.GetNameInfoById (attr.UniqueId);
1053 if (info != null
1054 && info.Name == name
1055 && info.ParentId == dir.UniqueId)
1056 unique_id = attr.UniqueId;
1059 if (unique_id == Guid.Empty)
1060 unique_id = Guid.NewGuid ();
1062 RegisterId (name, dir, unique_id);
1064 Indexable indexable;
1065 indexable = FileToIndexable (path, unique_id, dir, false);
1067 if (indexable != null) {
1068 Scheduler.Task task;
1069 task = NewAddTask (indexable);
1070 task.Priority = Scheduler.Priority.Immediate;
1071 ThisScheduler.Add (task);
1075 public void RemoveFile (DirectoryModel dir, string name)
1077 // FIXME: We might as well remove it, even if it was being ignore.
1078 // Right?
1080 Guid unique_id;
1081 unique_id = NameAndParentToId (name, dir);
1082 if (unique_id == Guid.Empty) {
1083 Logger.Log.Info ("Could not resolve unique id of '{0}' in '{1}' for removal, it is probably already gone",
1084 name, dir.FullName);
1085 return;
1088 Uri uri, file_uri;
1089 uri = GuidFu.ToUri (unique_id);
1090 file_uri = UriFu.PathToFileUri (Path.Combine (dir.FullName, name));
1092 Indexable indexable;
1093 indexable = new Indexable (IndexableType.Remove, uri);
1094 indexable.LocalState ["RemovedUri"] = file_uri;
1096 Scheduler.Task task;
1097 task = NewAddTask (indexable);
1098 task.Priority = Scheduler.Priority.Immediate;
1099 ThisScheduler.Add (task);
1102 public void MoveFile (DirectoryModel old_dir, string old_name,
1103 DirectoryModel new_dir, string new_name)
1105 bool old_ignore, new_ignore;
1106 old_ignore = filter.Ignore (old_dir, old_name, false);
1107 new_ignore = filter.Ignore (new_dir, new_name, false);
1109 if (old_ignore && new_ignore)
1110 return;
1112 // If our ignore-state is changing, synthesize the appropriate
1113 // action.
1115 if (old_ignore && ! new_ignore) {
1116 AddFile (new_dir, new_name);
1117 return;
1120 if (! old_ignore && new_ignore) {
1121 RemoveFile (new_dir, new_name);
1122 return;
1125 // We need to find the file's unique id.
1126 // We can't look at the extended attributes w/o making
1127 // assumptions about whether they follow around the
1128 // file (EAs) or the path (sqlite)...
1129 Guid unique_id;
1130 unique_id = NameAndParentToId (old_name, old_dir);
1131 if (unique_id == Guid.Empty) {
1132 // If we can't find the unique ID, we have to
1133 // assume that the original file never made it
1134 // into the index --- thus we treat this as
1135 // an Add.
1136 AddFile (new_dir, new_name);
1137 return;
1140 RegisterId (new_name, new_dir, unique_id);
1142 string old_path;
1143 old_path = Path.Combine (old_dir.FullName, old_name);
1145 ForgetId (old_path);
1147 // FIXME: I think we need to be more conservative when we seen
1148 // events in a directory that has not been fully scanned, just to
1149 // avoid races. i.e. what if we are in the middle of crawling that
1150 // directory and haven't reached this file yet? Then the rename
1151 // will fail.
1152 Indexable indexable;
1153 indexable = NewRenamingIndexable (new_name,
1154 unique_id,
1155 new_dir,
1156 old_path);
1158 Scheduler.Task task;
1159 task = NewAddTask (indexable);
1160 task.Priority = Scheduler.Priority.Immediate;
1161 // Danger Will Robinson!
1162 // We need to use BlockUntilNoCollision to get the correct notifications
1163 // in a mv a b; mv b c; mv c a situation.
1164 // FIXME: And now AddType no longer exists
1165 ThisScheduler.Add (task);
1168 //////////////////////////////////////////////////////////////////////////
1170 // Configuration stuff
1172 public IList Roots {
1173 get {
1174 return roots_by_path;
1178 private void LoadConfiguration ()
1180 if (Conf.Indexing.IndexHomeDir)
1181 AddRoot (PathFinder.HomeDir);
1183 foreach (string root in Conf.Indexing.Roots)
1184 AddRoot (root);
1186 Conf.Subscribe (typeof (Conf.IndexingConfig), OnConfigurationChanged);
1189 private void OnConfigurationChanged (Conf.Section section)
1191 ArrayList roots_wanted = new ArrayList (Conf.Indexing.Roots);
1193 if (Conf.Indexing.IndexHomeDir)
1194 roots_wanted.Add (PathFinder.HomeDir);
1196 IList roots_to_add, roots_to_remove;
1197 ArrayFu.IntersectListChanges (roots_wanted, Roots, out roots_to_add, out roots_to_remove);
1199 foreach (string root in roots_to_remove)
1200 RemoveRoot (root);
1202 foreach (string root in roots_to_add)
1203 AddRoot (root);
1206 //////////////////////////////////////////////////////////////////////////
1209 // Our magic LuceneQueryable hooks
1212 override protected bool PreChildAddHook (Indexable child)
1214 // FIXME: Handling Uri remapping of children is tricky, and there
1215 // is also the issue of properly serializing file: uris that
1216 // contain fragments. For now we just punt it all by dropping
1217 // any child indexables of file system objects.
1218 return false;
1221 override protected void PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
1223 // If we just changed properties, remap to our *old* external Uri
1224 // to make notification work out property.
1225 if (indexable.Type == IndexableType.PropertyChange) {
1227 string last_known_path;
1228 last_known_path = (string) indexable.LocalState ["LastKnownPath"];
1229 receipt.Uri = UriFu.PathToFileUri (last_known_path);
1230 Logger.Log.Debug ("Last known path is {0}", last_known_path);
1232 // This rename is now in the index, so we no longer need to keep
1233 // track of the uid in memory.
1234 ForgetId (last_known_path);
1236 return;
1239 string path;
1240 path = (string) indexable.LocalState ["Path"];
1241 ForgetId (path);
1243 DirectoryModel parent;
1244 parent = indexable.LocalState ["Parent"] as DirectoryModel;
1246 // The parent directory might have run away since we were indexed
1247 if (parent != null && ! parent.IsAttached)
1248 return;
1250 Guid unique_id;
1251 unique_id = GuidFu.FromUri (receipt.Uri);
1253 FileAttributes attr;
1254 attr = FileAttributesStore.ReadOrCreate (path, unique_id);
1255 attr.Path = path;
1256 attr.LastWriteTime = indexable.Timestamp;
1258 attr.FilterName = receipt.FilterName;
1259 attr.FilterVersion = receipt.FilterVersion;
1261 if (indexable.LocalState ["IsWalkable"] != null) {
1262 string name;
1263 name = (string) indexable.LocalState ["Name"];
1265 if (! RegisterDirectory (name, parent, attr))
1266 return;
1269 FileAttributesStore.Write (attr);
1271 // Remap the Uri so that change notification will work properly
1272 receipt.Uri = UriFu.PathToFileUri (path);
1275 override protected void PostRemoveHook (Indexable indexable, IndexerRemovedReceipt receipt)
1277 // Find the cached external Uri and remap the Uri in the receipt.
1278 // We have to do this to make change notification work.
1279 Uri external_uri;
1280 external_uri = indexable.LocalState ["RemovedUri"] as Uri;
1281 if (external_uri == null)
1282 throw new Exception ("No cached external Uri for " + receipt.Uri);
1283 receipt.Uri = external_uri;
1284 ForgetId (external_uri.LocalPath);
1287 private bool RemapUri (Hit hit)
1289 // Store the hit's internal uri in a property
1290 Property prop;
1291 prop = Property.NewUnsearched ("beagle:InternalUri",
1292 UriFu.UriToSerializableString (hit.Uri));
1293 hit.AddProperty (prop);
1295 // Now assemble the path by looking at the parent and name
1296 string name, path;
1297 name = hit [ExactFilenamePropKey];
1298 if (name == null) {
1299 // If we don't have the filename property, we have to do a lookup
1300 // based on the guid. This happens with synthetic hits produced by
1301 // index listeners.
1302 Guid hit_id;
1303 hit_id = GuidFu.FromUri (hit.Uri);
1304 path = UniqueIdToFullPath (hit_id);
1305 } else {
1306 string parent_id_uri;
1307 parent_id_uri = hit [ParentDirUriPropKey];
1308 if (parent_id_uri == null)
1309 return false;
1311 Guid parent_id;
1312 parent_id = GuidFu.FromUriString (parent_id_uri);
1314 path = ToFullPath (name, parent_id);
1315 if (path == null)
1316 Logger.Log.Debug ("Couldn't find path of file with name '{0}' and parent '{1}'",
1317 name, GuidFu.ToShortString (parent_id));
1320 if (path != null) {
1321 hit.Uri = UriFu.PathToFileUri (path);
1322 return true;
1325 return false;
1328 // Hit filter: this handles our mapping from internal->external uris,
1329 // and checks to see if the file is still there.
1330 override protected bool HitFilter (Hit hit)
1332 Uri old_uri = hit.Uri;
1334 if (! RemapUri (hit))
1335 return false;
1337 string path;
1338 path = hit.Uri.LocalPath;
1340 bool is_directory;
1341 bool exists = false;
1343 is_directory = hit.MimeType == "inode/directory";
1345 if (hit.MimeType == null && hit.Uri.IsFile && Directory.Exists (path)) {
1346 is_directory = true;
1347 exists = true;
1350 if (! exists) {
1351 if (is_directory)
1352 exists = Directory.Exists (path);
1353 else
1354 exists = File.Exists (path);
1357 // If the file doesn't exist, we do not schedule a removal and
1358 // return false. This is to avoid "losing" files if they are
1359 // in a directory that has been renamed but which we haven't
1360 // scanned yet... if we dropped them from the index, they would
1361 // never get re-indexed (or at least not until the next time they
1362 // were touched) since they would still be stamped with EAs
1363 // indicating they were up-to-date. And that would be bad.
1364 // FIXME: It would be safe if we were in a known state, right?
1365 // i.e. every DirectoryModel is clean.
1366 if (! exists)
1367 return false;
1369 // Fetch the parent directory model from our cache to do clever
1370 // filtering to determine if we're ignoring it or not.
1371 DirectoryModel parent;
1372 parent = GetDirectoryModelByPath (Path.GetDirectoryName (path));
1374 // Check the ignore status of the hit
1375 if (filter.Ignore (parent, Path.GetFileName (path), is_directory))
1376 return false;
1378 return true;
1381 override public string GetSnippet (string [] query_terms, Hit hit)
1383 // Uri remapping from a hit is easy: the internal uri
1384 // is stored in a property.
1385 Uri uri = UriFu.UriStringToUri (hit ["beagle:InternalUri"]);
1387 string path = TextCache.UserCache.LookupPathRaw (uri);
1389 if (path == null)
1390 return null;
1392 // If this is self-cached, use the remapped Uri
1393 if (path == TextCache.SELF_CACHE_TAG)
1394 return SnippetFu.GetSnippetFromFile (query_terms, hit.Uri.LocalPath);
1396 return SnippetFu.GetSnippetFromTextCache (query_terms, path);
1399 override public void Start ()
1401 base.Start ();
1403 event_backend.Start (this);
1405 LoadConfiguration ();
1407 Logger.Log.Debug ("Done starting FileSystemQueryable");
1410 //////////////////////////////////////////////////////////////////////////
1412 // These are the methods that the IFileEventBackend implementations should
1413 // call in response to events.
1415 public void ReportEventInDirectory (string directory_name)
1417 DirectoryModel dir;
1418 dir = GetDirectoryModelByPath (directory_name);
1420 // If something goes wrong, just fail silently.
1421 if (dir == null)
1422 return;
1424 // We only use this information to prioritize the order in which
1425 // we crawl directories --- so if this directory doesn't
1426 // actually need to be crawled, we can safely ignore it.
1427 if (! dir.NeedsCrawl)
1428 return;
1430 dir.LastActivityTime = DateTime.Now;
1432 Logger.Log.Debug ("Saw event in '{0}'", directory_name);
1435 public void HandleAddEvent (string directory_name, string file_name, bool is_directory)
1437 Logger.Log.Debug ("*** Add '{0}' '{1}' {2}", directory_name, file_name,
1438 is_directory ? "(dir)" : "(file)");
1440 DirectoryModel dir;
1441 dir = GetDirectoryModelByPath (directory_name);
1442 if (dir == null) {
1443 Logger.Log.Warn ("HandleAddEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1444 return;
1447 if (is_directory)
1448 AddDirectory (dir, file_name);
1449 else
1450 AddFile (dir, file_name);
1453 public void HandleRemoveEvent (string directory_name, string file_name, bool is_directory)
1455 Logger.Log.Debug ("*** Remove '{0}' '{1}' {2}", directory_name, file_name,
1456 is_directory ? "(dir)" : "(file)");
1458 if (is_directory) {
1459 string path;
1460 path = Path.Combine (directory_name, file_name);
1462 DirectoryModel dir;
1463 dir = GetDirectoryModelByPath (path);
1464 if (dir == null) {
1465 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", path);
1466 return;
1469 dir.WatchHandle = null;
1470 RemoveDirectory (dir);
1471 } else {
1472 DirectoryModel dir;
1473 dir = GetDirectoryModelByPath (directory_name);
1474 if (dir == null) {
1475 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1476 return;
1479 RemoveFile (dir, file_name);
1483 public void HandleMoveEvent (string old_directory_name, string old_file_name,
1484 string new_directory_name, string new_file_name,
1485 bool is_directory)
1487 Logger.Log.Debug ("*** Move '{0}' '{1}' -> '{2}' '{3}' {4}",
1488 old_directory_name, old_file_name,
1489 new_directory_name, new_file_name,
1490 is_directory ? "(dir)" : "(file)");
1492 if (is_directory) {
1493 DirectoryModel dir, new_parent;
1494 dir = GetDirectoryModelByPath (Path.Combine (old_directory_name, old_file_name));
1495 new_parent = GetDirectoryModelByPath (new_directory_name);
1496 MoveDirectory (dir, new_parent, new_file_name);
1497 return;
1498 } else {
1499 DirectoryModel old_dir, new_dir;
1500 old_dir = GetDirectoryModelByPath (old_directory_name);
1501 new_dir = GetDirectoryModelByPath (new_directory_name);
1502 MoveFile (old_dir, old_file_name, new_dir, new_file_name);
1506 public void HandleOverflowEvent ()
1508 Logger.Log.Debug ("Queue overflows suck");