4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
33 namespace Beagle
.Daemon
{
35 public abstract class LuceneQueryable
: IQueryable
{
37 public delegate IIndexer
IndexerCreator (string name
, int minor_version
);
39 static private IndexerCreator indexer_hook
= null;
41 static public IndexerCreator IndexerHook
{
42 set { indexer_hook = value; }
45 virtual protected IIndexer
LocalIndexerHook ()
50 //////////////////////////////////////////////////////////
52 private Scheduler scheduler
= Scheduler
.Global
;
54 private string index_name
;
55 private int minor_version
;
57 private LuceneDriver driver
;
58 private IIndexer indexer
;
59 private LuceneTaskCollector collector
;
60 private FileAttributesStore fa_store
;
62 private LuceneDriver
.UriRemapper to_internal_uris
= null;
63 private LuceneDriver
.UriRemapper from_internal_uris
= null;
65 //////////////////////////////////////////////////////////
67 private Hashtable indexable_info_cache
= UriFu
.NewHashtable ();
68 private class IndexableInfo
{
71 public DateTime Mtime
;
74 internal void CacheIndexableInfo (Indexable indexable
)
76 if (indexable
.IsNonTransient
) {
77 IndexableInfo info
= new IndexableInfo ();
78 info
.Uri
= indexable
.Uri
;
79 info
.Path
= indexable
.ContentUri
.LocalPath
;
80 info
.Mtime
= FileSystem
.GetLastWriteTime (info
.Path
);
81 indexable_info_cache
[info
.Uri
] = info
;
85 internal void UseCachedIndexableInfo (Uri uri
)
87 IndexableInfo info
= indexable_info_cache
[uri
] as IndexableInfo
;
89 this.FileAttributesStore
.AttachTimestamp (info
.Path
, info
.Mtime
);
90 indexable_info_cache
.Remove (uri
);
94 //////////////////////////////////////////////////////////
96 public LuceneQueryable (string index_name
) : this (index_name
, -1, false) { }
98 public LuceneQueryable (string index_name
, bool disable_locking
) : this (index_name
, -1, disable_locking
) { }
100 public LuceneQueryable (string index_name
, int minor_version
) : this (index_name
, minor_version
, false) { }
102 public LuceneQueryable (string index_name
, int minor_version
, bool disable_locking
)
104 this.index_name
= index_name
;
105 this.minor_version
= minor_version
;
107 driver
= new LuceneDriver (this.index_name
, this.minor_version
, disable_locking
);
109 indexer
= LocalIndexerHook ();
110 if (indexer
== null && indexer_hook
!= null)
111 indexer
= indexer_hook (this.index_name
, this.minor_version
);
115 indexer
.ChangedEvent
+= OnIndexerChanged
;
116 indexer
.ChildIndexableEvent
+= OnChildIndexableEvent
;
117 indexer
.UrisFilteredEvent
+= OnUrisFilteredEvent
;
119 fa_store
= new FileAttributesStore (BuildFileAttributesStore (driver
.Fingerprint
));
121 collector
= new LuceneTaskCollector (indexer
);
124 virtual protected IFileAttributesStore
BuildFileAttributesStore (string index_fingerprint
)
126 if (ExtendedAttribute
.Supported
)
127 return new FileAttributesStore_ExtendedAttribute (index_fingerprint
);
129 return new FileAttributesStore_Sqlite (IndexDirectory
, index_fingerprint
);
132 protected string IndexName
{
133 get { return index_name; }
136 protected string IndexDirectory
{
137 get { return driver.IndexDirectory; }
140 protected string IndexStoreDirectory
{
141 get { return driver.StorePath; }
144 protected LuceneDriver Driver
{
145 get { return driver; }
148 public Scheduler ThisScheduler
{
149 get { return scheduler; }
152 public FileAttributesStore FileAttributesStore
{
153 get { return fa_store; }
156 /////////////////////////////////////////
158 public void SetUriRemappers (LuceneDriver
.UriRemapper to_internal_uris
,
159 LuceneDriver
.UriRemapper from_internal_uris
)
161 this.to_internal_uris
= to_internal_uris
;
162 this.from_internal_uris
= from_internal_uris
;
165 /////////////////////////////////////////
167 protected virtual void AbusiveAddHook (Uri uri
)
172 protected virtual void AbusiveRemoveHook (Uri internal_uri
, Uri external_uri
)
177 protected virtual void AbusiveRenameHook (Uri old_uri
, Uri new_uri
)
182 protected virtual void AbusiveChildIndexableHook (Indexable child_indexable
)
187 protected virtual void AbusiveUriFilteredHook (FilteredStatus uri_filtered
)
192 /////////////////////////////////////////
194 // *** FIXME *** FIXME *** FIXME *** FIXME ***
195 // When we rename a directory, we need to somehow
196 // propagate change information to files under that
197 // directory. Example: say that file foo is in
198 // directory bar, and there is an open query that
199 // matches foo. The tile probably says something
200 // like "foo, in folder bar".
201 // Then assume I rename bar to baz. That notification
202 // will go out, so a query matching bar will get
203 // updated... but the query matching foo will not.
204 // What should really happen is that the tile
205 // should change to say "foo, in folder baz".
206 // But making that work will require some hacking
207 // on the QueryResults.
208 // *** FIXME *** FIXME *** FIXME *** FIXME ***
210 private class ChangeData
: IQueryableChangeData
{
211 public ICollection AddedUris
;
212 public ICollection RemovedUris
;
215 private void OnIndexerChanged (IIndexer source
,
216 ICollection list_of_added_uris
,
217 ICollection list_of_removed_uris
,
218 ICollection list_of_renamed_uris
)
220 // If we have renamed uris, synthesize some approproate
222 // Right now we assume that there will never be adds/removes
223 // and renames in the same event. That is true now, but could
224 // change in the future.
225 if (list_of_renamed_uris
!= null && list_of_renamed_uris
.Count
> 0) {
227 IEnumerator x
= list_of_renamed_uris
.GetEnumerator ();
229 while (x
.MoveNext ()) {
230 Uri old_uri
= x
.Current
as Uri
;
231 if (from_internal_uris
!= null)
232 old_uri
= from_internal_uris (old_uri
);
234 Uri new_uri
= x
.Current
as Uri
;
237 AbusiveRenameHook (old_uri
, new_uri
);
238 } catch (Exception ex
) {
239 Logger
.Log
.Warn ("*** Caught exception in AbusiveRenameHook '{0}' => '{1}'",
241 Logger
.Log
.Warn (ex
);
244 Logger
.Log
.Debug ("*** Faking change data {0} => {1}", old_uri
, new_uri
);
246 ChangeData fake_change_data
= new ChangeData ();
247 fake_change_data
.AddedUris
= new Uri
[1] { new_uri }
;
248 fake_change_data
.RemovedUris
= new Uri
[1] { old_uri }
;
249 QueryDriver
.QueryableChanged (this, fake_change_data
);
256 // Walk across the list of removed Uris and drop them
257 // from the text cache.
258 foreach (Uri uri
in list_of_removed_uris
)
259 TextCache
.Delete (uri
);
261 // Walk across the list of added Uris and mark the local
262 // files with the cached timestamp.
263 foreach (Uri uri
in list_of_added_uris
) {
264 UseCachedIndexableInfo (uri
);
266 AbusiveAddHook (uri
);
267 } catch (Exception ex
) {
268 Logger
.Log
.Warn ("Caught exception in AbusiveAddHook '{0}'", uri
);
269 Logger
.Log
.Warn (ex
);
273 // Propagate the event up through the Queryable.
274 ChangeData change_data
= new ChangeData ();
276 // Keep a copy of the original list of Uris to remove
277 ICollection original_list_of_removed_uris
= list_of_removed_uris
;
279 // If necessary, remap Uris
280 if (from_internal_uris
!= null) {
281 Uri
[] remapped_adds
= new Uri
[list_of_added_uris
.Count
];
282 Uri
[] remapped_removes
= new Uri
[list_of_removed_uris
.Count
];
285 foreach (Uri uri
in list_of_added_uris
)
286 remapped_adds
[i
++] = from_internal_uris (uri
);
288 foreach (Uri uri
in list_of_removed_uris
)
289 remapped_removes
[i
++] = from_internal_uris (uri
);
291 list_of_added_uris
= remapped_adds
;
292 list_of_removed_uris
= remapped_removes
;
295 change_data
.AddedUris
= list_of_added_uris
;
296 change_data
.RemovedUris
= list_of_removed_uris
;
298 // We want to make sure all of our remappings are done
299 // before calling this hook, since it can (and should)
300 // break the link between uids and paths.
301 IEnumerator internal_enumerator
= original_list_of_removed_uris
.GetEnumerator ();
302 IEnumerator external_enumerator
= list_of_removed_uris
.GetEnumerator ();
303 while (internal_enumerator
.MoveNext () && external_enumerator
.MoveNext ()) {
304 Uri internal_uri
= internal_enumerator
.Current
as Uri
;
305 Uri external_uri
= external_enumerator
.Current
as Uri
;
307 AbusiveRemoveHook (internal_uri
, external_uri
);
308 } catch (Exception ex
) {
309 Logger
.Log
.Warn ("Caught exception in AbusiveRemoveHook '{0}' '{1}'",
310 internal_uri
, external_uri
);
311 Logger
.Log
.Warn (ex
);
315 QueryDriver
.QueryableChanged (this, change_data
);
318 /////////////////////////////////////////
320 private void OnChildIndexableEvent (Indexable
[] child_indexables
)
322 foreach (Indexable i
in child_indexables
) {
324 AbusiveChildIndexableHook (i
);
326 Scheduler
.Task task
= NewAddTask (i
);
327 // FIXME: Probably need a better priority than this
328 task
.Priority
= Scheduler
.Priority
.Generator
;
329 ThisScheduler
.Add (task
);
330 } catch (InvalidOperationException ex
) {
331 // Queryable does not support adding children
332 } catch (Exception ex
) {
333 Logger
.Log
.Warn ("Caught exception in AbusiveChildIndexableHook '{0}'", i
.Uri
);
334 Logger
.Log
.Warn (ex
);
339 public void OnUrisFilteredEvent (FilteredStatus
[] uris_filtered
)
341 foreach (FilteredStatus uri_filtered
in uris_filtered
) {
343 AbusiveUriFilteredHook (uri_filtered
);
344 } catch (Exception ex
) {
345 Logger
.Log
.Warn ("Caught exception in AbusiveUriFilteredHook '{0}'", uri_filtered
.Uri
);
346 Logger
.Log
.Warn (ex
);
351 /////////////////////////////////////////
353 virtual public void Start ()
358 /////////////////////////////////////////
360 virtual public bool AcceptQuery (Query query
)
365 /////////////////////////////////////////
367 virtual protected bool HitIsValid (Uri uri
)
372 // Schedule all non-valid Uris for removal.
373 private bool HitIsValidOrElse (Uri uri
)
375 bool is_valid
= HitIsValid (uri
);
379 // FIXME: There is probably a race here --- what if the hit
380 // becomes valid sometime between calling HitIsValid
381 // and the removal task being executed?
383 Scheduler
.Task task
= NewRemoveTask_InternalUri (uri
);
384 ThisScheduler
.Add (task
, Scheduler
.AddType
.DeferToExisting
);
390 /////////////////////////////////////////
392 virtual protected double RelevancyMultiplier (Hit hit
)
397 static protected double HalfLifeMultiplier (DateTime dt
, int half_life_days
)
399 double days
= Math
.Abs ((DateTime
.Now
- dt
).TotalDays
);
402 return Math
.Pow (0.5, days
/ (double) half_life_days
);
405 // FIXME: A decaying half-life is a little sketchy, since data
406 // will eventually decay beyond the epsilon and be dropped
407 // from the results entirely, which is almost never what we
408 // want, particularly in searches with a few number of
409 // results. But with a default half-life of 6 months, it'll
410 // take over 13 years to fully decay outside the epsilon on
411 // this multiplier alone.
412 static protected double HalfLifeMultiplier (DateTime time
)
414 // Default relevancy half-life is six months.
415 return HalfLifeMultiplier (time
, 182);
418 static protected double HalfLifeMultiplierFromProperty (Hit hit
,
419 double default_multiplier
,
420 params object [] properties
)
422 double best_m
= -1.0;
424 foreach (object obj
in properties
) {
425 string key
= obj
as string;
426 string val
= hit
[key
];
428 DateTime dt
= StringFu
.StringToDateTime (val
);
430 this_m
= HalfLifeMultiplier (dt
, 182); /* 182 days == six months */
437 best_m
= default_multiplier
;
441 /////////////////////////////////////////
443 protected virtual ICollection
DoBonusQuery (Query query
, ICollection list_of_uris
)
448 public void DoQuery (Query query
,
449 IQueryResult query_result
,
450 IQueryableChangeData i_change_data
)
452 ChangeData change_data
= (ChangeData
) i_change_data
;
454 ICollection added_uris
= null;
455 ICollection extra_uris
= null;
457 if (change_data
!= null) {
459 if (change_data
.RemovedUris
!= null) {
460 foreach (Uri uri
in change_data
.RemovedUris
) {
461 Logger
.Log
.Debug ("**** Removing {0}", uri
);
462 query_result
.Subtract (uri
);
466 // If nothing was added, we can safely return now: this change
467 // cannot have any further effect on an outstanding live query.
468 if (change_data
.AddedUris
== null
469 || change_data
.AddedUris
.Count
== 0)
472 // Remove any added URIs from the result, so that we properly
473 // handle updates and don't get duplicate results in clients.
474 foreach (Uri uri
in change_data
.AddedUris
)
475 query_result
.Subtract (uri
);
477 if (to_internal_uris
!= null) {
478 Uri
[] remapped_uris
= new Uri
[change_data
.AddedUris
.Count
];
480 foreach (Uri uri
in change_data
.AddedUris
) {
481 Uri new_uri
= to_internal_uris (uri
);
482 remapped_uris
[i
++] = new_uri
;
483 Logger
.Log
.Debug ("*** Remapped {0} => {1}", uri
, new_uri
);
485 added_uris
= remapped_uris
;
487 added_uris
= change_data
.AddedUris
;
491 extra_uris
= DoBonusQuery (query
, added_uris
);
493 Driver
.DoQuery (query
,
497 new LuceneDriver
.UriFilter (HitIsValidOrElse
),
499 new LuceneDriver
.RelevancyMultiplier (RelevancyMultiplier
));
502 /////////////////////////////////////////
504 public virtual string GetSnippet (string[] query_terms
, Hit hit
)
506 // Look up the hit in our text cache. If it is there,
507 // use the cached version to generate a snippet.
510 if (to_internal_uris
!= null)
511 uri
= to_internal_uris (uri
);
513 TextReader reader
= TextCache
.GetReader (uri
, from_internal_uris
);
517 string snippet
= SnippetFu
.GetSnippet (query_terms
, reader
);
523 /////////////////////////////////////////
525 public virtual int GetItemCount ()
527 return driver
.GetItemCount ();
530 /////////////////////////////////////////
532 public FileStream
ReadDataStream (string name
)
534 string path
= Path
.Combine (Path
.Combine (PathFinder
.StorageDir
, this.IndexName
), name
);
536 if (!File
.Exists (path
))
539 return new FileStream (path
, System
.IO
.FileMode
.Open
, FileAccess
.Read
);
542 public string ReadDataLine (string name
)
544 FileStream stream
= ReadDataStream (name
);
549 StreamReader reader
= new StreamReader (stream
);
550 string line
= reader
.ReadLine ();
556 public FileStream
WriteDataStream (string name
)
558 string path
= Path
.Combine (Path
.Combine (PathFinder
.StorageDir
, this.IndexName
), name
);
560 return new FileStream (path
, System
.IO
.FileMode
.Create
, FileAccess
.Write
);
565 public void WriteDataLine (string name
, string line
)
568 string path
= Path
.Combine (Path
.Combine (PathFinder
.StorageDir
, this.IndexName
), name
);
570 if (File
.Exists (path
))
576 FileStream stream
= WriteDataStream (name
);
577 StreamWriter writer
= new StreamWriter (stream
);
578 writer
.WriteLine (line
);
583 //////////////////////////////////////////////////////////////////////////////////
586 // The types involved here are defined below
589 public Scheduler
.Task
NewAddTask (Indexable indexable
)
592 task
= new LuceneTask (this, this.indexer
, indexable
);
593 task
.Collector
= collector
;
597 public Scheduler
.Task
NewAddTask (IIndexableGenerator generator
, Scheduler
.Hook generator_hook
)
600 task
= new LuceneTask (this, this.indexer
, generator
);
601 task
.Priority
= Scheduler
.Priority
.Generator
;
602 task
.GeneratorHook
= generator_hook
;
606 public Scheduler
.Task
NewAddTask (IIndexableGenerator generator
)
608 return this.NewAddTask (generator
, null);
611 public Scheduler
.Task
NewRemoveTask (Uri uri
) // This should be an external Uri
614 task
= new LuceneTask (this, this.indexer
, uri
, to_internal_uris
);
615 task
.Collector
= collector
;
619 public Scheduler
.Task
NewRemoveTask_InternalUri (Uri uri
) // This should be an internal Uri
622 task
= new LuceneTask (this, this.indexer
, uri
, null);
623 task
.Collector
= collector
;
627 // old_uri should be an internal Uri
628 // new_uri should be an external Uri
629 public Scheduler
.Task
NewRenameTask (Uri old_uri
, Uri new_uri
)
632 task
= new LuceneTask (this, this.indexer
, old_uri
, to_internal_uris
, new_uri
, null);
634 // To avoid grouping with anything else, we create our own collector
635 task
.Collector
= new LuceneTaskCollector (indexer
);
640 public Scheduler
.Task
NewTaskFromHook (Scheduler
.TaskHook hook
)
642 Scheduler
.Task task
= Scheduler
.TaskFromHook (hook
);
643 task
.Collector
= collector
;
647 //////////////////////////////////////////////////////////////////////////////////
649 private class LuceneTaskCollector
: Scheduler
.ITaskCollector
{
653 public LuceneTaskCollector (IIndexer indexer
)
655 this.indexer
= indexer
;
658 public double GetMinimumWeight ()
663 public double GetMaximumWeight ()
665 // FIXME: this is totally arbitrary
669 public void PreTaskHook ()
674 public void PostTaskHook ()
681 //////////////////////////////////////////////////////////////////////////////////
683 private class LuceneTask
: Scheduler
.Task
{
685 LuceneQueryable queryable
;
689 // If non-null, add this Indexable
690 Indexable indexable
= null;
692 // If uri != null && other_uri == null, remove uri
693 // If both are non-null, rename uri => other_uri
695 Uri other_uri
= null;
697 LuceneDriver
.UriRemapper uri_remapper
= null;
698 LuceneDriver
.UriRemapper other_uri_remapper
= null;
700 // If non-null, add this IIndexableGenerator
701 IIndexableGenerator generator
= null;
703 // FIXME: number of items generated
704 // from the Indexable shouldn't be
706 const int hard_wired_generation_count
= 30;
708 // Hook to be invoked after the IIndexableGenerator
709 // has finished processing a batch of Indexables,
710 // just prior to flushing the driver.
711 public Scheduler
.Hook GeneratorHook
;
713 public LuceneTask (LuceneQueryable queryable
, IIndexer indexer
, Indexable indexable
) // Add
715 this.queryable
= queryable
;
716 this.indexer
= indexer
;
717 this.indexable
= indexable
;
719 this.Tag
= indexable
.DisplayUri
.ToString ();
723 public LuceneTask (LuceneQueryable queryable
, IIndexer indexer
,
724 Uri uri
, LuceneDriver
.UriRemapper remapper
) // Remove
726 this.queryable
= queryable
;
727 this.indexer
= indexer
;
729 this.uri_remapper
= remapper
;
731 this.Tag
= uri
.ToString ();
732 this.Weight
= 0.499999;
735 public LuceneTask (LuceneQueryable queryable
, IIndexer indexer
,
736 Uri old_uri
, LuceneDriver
.UriRemapper old_remapper
,
737 Uri new_uri
, LuceneDriver
.UriRemapper new_remapper
) // Rename
739 this.queryable
= queryable
;
740 this.indexer
= indexer
;
743 this.other_uri
= new_uri
;
745 this.uri_remapper
= old_remapper
;
746 this.other_uri_remapper
= new_remapper
;
748 this.Tag
= String
.Format ("{0} => {1}", old_uri
, new_uri
);
749 this.Weight
= 0.1; // In theory renames are light-weight
752 public LuceneTask (LuceneQueryable queryable
, IIndexer indexer
, IIndexableGenerator generator
) // Add Many
754 this.queryable
= queryable
;
755 this.indexer
= indexer
;
756 this.generator
= generator
;
758 this.Tag
= generator
.StatusName
;
759 this.Weight
= hard_wired_generation_count
;
762 protected override void DoTaskReal ()
764 // Remap Uris as necessary
765 if (uri
!= null && uri_remapper
!= null)
766 uri
= uri_remapper (uri
);
767 if (other_uri
!= null && other_uri_remapper
!= null)
768 other_uri
= other_uri_remapper (other_uri
);
770 if (indexable
!= null) {
771 if (! (indexable
.Uri
.IsFile
772 && queryable
.FileAttributesStore
.IsUpToDate (indexable
.Uri
.LocalPath
))) {
773 queryable
.CacheIndexableInfo (indexable
);
774 indexer
.Add (indexable
);
776 } else if (uri
!= null && other_uri
!= null) {
777 indexer
.Rename (uri
, other_uri
);
778 } else if (uri
!= null) {
779 indexer
.Remove (uri
);
780 } else if (generator
!= null) {
782 // Since this is a generator, we want the task to
783 // get re-scheduled after it is run.
787 for (count
= 0; count
< hard_wired_generation_count
; ++count
) {
788 if (!generator
.HasNextIndexable ()) {
789 // ...except if there is no more work to do, of course.
794 Indexable generated
= generator
.GetNextIndexable ();
796 // Note that the indexable generator can return null.
797 // This means that the generator didn't have an indexable
798 // to return this time through, but it does not mean that
799 // its processing queue is empty.
800 // FIXME: Shouldn't we just break if generated is null?
801 // Right now we just call GetNextIndexable a bunch of times
802 // when we don't have more work to do.
803 if (generated
!= null) {
804 queryable
.CacheIndexableInfo (generated
);
805 indexer
.Add (generated
);
809 if (count
> 0 && this.GeneratorHook
!= null)
810 this.GeneratorHook ();
817 //////////////////////////////////////////////////////////////////////////////////
819 private class MarkingClosure
{
820 FileAttributesStore fa_store
;
824 public MarkingClosure (FileAttributesStore fa_store
,
828 this.fa_store
= fa_store
;
835 fa_store
.AttachTimestamp (path
, mtime
);
840 protected Scheduler
.TaskGroup
NewMarkingTaskGroup (string path
, DateTime mtime
)
842 MarkingClosure mc
= new MarkingClosure (FileAttributesStore
, path
, mtime
);
843 Scheduler
.Hook post_hook
= new Scheduler
.Hook (mc
.Mark
);
844 return Scheduler
.NewTaskGroup ("mark " + path
, null, post_hook
);