4 // Copyright (C) 2004-2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
33 namespace Beagle
.Daemon
{
35 public abstract class LuceneQueryable
: IQueryable
{
37 public delegate IIndexer
IndexerCreator (string name
, int minor_version
);
39 static private IndexerCreator indexer_hook
= null;
41 static public IndexerCreator IndexerHook
{
42 set { indexer_hook = value; }
45 virtual protected IIndexer
LocalIndexerHook ()
50 //////////////////////////////////////////////////////////
52 public delegate void OptimizeAllHandler ();
54 static private OptimizeAllHandler OptimizeAllEvent
;
56 static public void OptimizeAll ()
58 if (OptimizeAllEvent
!= null)
62 //////////////////////////////////////////////////////////
64 private Scheduler scheduler
= Scheduler
.Global
;
65 private FileAttributesStore fa_store
= null;
67 private string index_name
;
68 private int minor_version
;
69 private bool read_only_mode
;
71 private LuceneQueryingDriver driver
;
72 private IIndexer indexer
= null;
73 private LuceneTaskCollector collector
;
75 private LuceneQueryingDriver
.UriFilter our_uri_filter
;
76 private LuceneCommon
.HitFilter our_hit_filter
;
78 //////////////////////////////////////////////////////////
80 public LuceneQueryable (string index_name
) : this (index_name
, -1, false) { }
82 public LuceneQueryable (string index_name
, bool read_only_mode
) : this (index_name
, -1, read_only_mode
) { }
84 public LuceneQueryable (string index_name
, int minor_version
) : this (index_name
, minor_version
, false) { }
86 public LuceneQueryable (string index_name
, int minor_version
, bool read_only_mode
)
88 this.index_name
= index_name
;
89 this.minor_version
= minor_version
;
90 this.read_only_mode
= read_only_mode
;
92 driver
= BuildLuceneQueryingDriver (this.index_name
, this.minor_version
, this.read_only_mode
);
93 our_uri_filter
= new LuceneQueryingDriver
.UriFilter (this.HitIsValidOrElse
);
94 our_hit_filter
= new LuceneCommon
.HitFilter (this.HitFilter
);
96 // If the queryable is in read-only more, don't
97 // instantiate an indexer for it.
101 indexer
= LocalIndexerHook ();
102 if (indexer
== null && indexer_hook
!= null)
103 indexer
= indexer_hook (this.index_name
, this.minor_version
);
105 indexer
.FlushEvent
+= OnFlushEvent
;
107 OptimizeAllEvent
+= OnOptimizeAllEvent
;
109 collector
= new LuceneTaskCollector (indexer
);
112 protected string IndexName
{
113 get { return index_name; }
116 protected string IndexDirectory
{
117 get { return driver.TopDirectory; }
120 protected string IndexFingerprint
{
121 get { return driver.Fingerprint; }
124 protected LuceneQueryingDriver Driver
{
125 get { return driver; }
128 public Scheduler ThisScheduler
{
129 get { return scheduler; }
132 /////////////////////////////////////////
134 virtual public void Start ()
139 /////////////////////////////////////////
141 virtual public bool AcceptQuery (Query query
)
146 /////////////////////////////////////////
148 virtual protected bool HitIsValid (Uri uri
)
153 // Schedule all non-valid Uris for removal.
154 private bool HitIsValidOrElse (Uri uri
)
156 bool is_valid
= HitIsValid (uri
);
158 if (! is_valid
&& ! read_only_mode
) {
160 // FIXME: There is probably a race here --- what if the hit
161 // becomes valid sometime between calling HitIsValid
162 // and the removal task being executed?
164 Scheduler
.Task task
= NewRemoveTask (uri
);
165 ThisScheduler
.Add (task
, Scheduler
.AddType
.DeferToExisting
);
171 virtual protected bool HitFilter (Hit hit
)
176 /////////////////////////////////////////
178 virtual protected Hit
PostProcessHit (Hit hit
)
183 /////////////////////////////////////////
185 // DEPRECATED: This does nothing, since everything is now
187 virtual protected double RelevancyMultiplier (Hit hit
)
192 static protected double HalfLifeMultiplier (DateTime dt
, int half_life_days
)
194 double days
= Math
.Abs ((DateTime
.Now
- dt
).TotalDays
);
197 return Math
.Pow (0.5, days
/ (double) half_life_days
);
200 // FIXME: A decaying half-life is a little sketchy, since data
201 // will eventually decay beyond the epsilon and be dropped
202 // from the results entirely, which is almost never what we
203 // want, particularly in searches with a few number of
204 // results. But with a default half-life of 6 months, it'll
205 // take over 13 years to fully decay outside the epsilon on
206 // this multiplier alone.
207 static protected double HalfLifeMultiplier (DateTime time
)
209 // Default relevancy half-life is six months.
210 return HalfLifeMultiplier (time
, 182);
213 static protected double HalfLifeMultiplierFromProperty (Hit hit
,
214 double default_multiplier
,
215 params object [] properties
)
217 double best_m
= -1.0;
219 foreach (object obj
in properties
) {
220 string key
= obj
as string;
221 string val
= hit
[key
];
223 DateTime dt
= StringFu
.StringToDateTime (val
);
225 this_m
= HalfLifeMultiplier (dt
, 182); /* 182 days == six months */
232 best_m
= default_multiplier
;
236 /////////////////////////////////////////
238 // *** FIXME *** FIXME *** FIXME *** FIXME ***
239 // When we rename a directory, we need to somehow
240 // propagate change information to files under that
241 // directory. Example: say that file foo is in
242 // directory bar, and there is an open query that
243 // matches foo. The tile probably says something
244 // like "foo, in folder bar".
245 // Then assume I rename bar to baz. That notification
246 // will go out, so a query matching bar will get
247 // updated... but the query matching foo will not.
248 // What should really happen is that the tile
249 // should change to say "foo, in folder baz".
250 // But making that work will require some hacking
251 // on the QueryResults.
252 // *** FIXME *** FIXME *** FIXME *** FIXME ***
254 private class ChangeData
: IQueryableChangeData
{
256 // These get fed back to LuceneQueryingDriver.DoQuery
257 // as a search subset, and hence need to be internal
258 // Uris when we are remapping.
259 public ICollection AddedUris
;
261 // These get reported directly to clients in
262 // Subtract events, and thus need to be external Uris
263 // when we are remapping.
264 public ICollection RemovedUris
;
267 public void DoQuery (Query query
,
268 IQueryResult query_result
,
269 IQueryableChangeData i_change_data
)
271 ChangeData change_data
= (ChangeData
) i_change_data
;
273 ICollection added_uris
= null;
275 if (change_data
!= null) {
277 if (change_data
.RemovedUris
!= null)
278 query_result
.Subtract (change_data
.RemovedUris
);
280 // If nothing was added, we can safely return now: this change
281 // cannot have any further effect on an outstanding live query.
282 if (change_data
.AddedUris
== null
283 || change_data
.AddedUris
.Count
== 0)
286 added_uris
= change_data
.AddedUris
;
289 Driver
.DoQuery (query
,
296 /////////////////////////////////////////
298 protected string GetSnippetFromTextCache (string [] query_terms
, Uri uri
)
300 // Look up the hit in our text cache. If it is there,
301 // use the cached version to generate a snippet.
304 reader
= TextCache
.UserCache
.GetReader (uri
);
308 string snippet
= SnippetFu
.GetSnippet (query_terms
, reader
);
314 // When remapping, override this with
315 // return GetSnippetFromTextCache (query_terms, remapping_fn (hit.Uri))
316 virtual public string GetSnippet (string [] query_terms
, Hit hit
)
318 return GetSnippetFromTextCache (query_terms
, hit
.Uri
);
321 /////////////////////////////////////////
323 public virtual int GetItemCount ()
325 // If we're in read-only mode, query the driver and
326 // not the indexer for the item count.
328 return driver
.GetItemCount ();
330 return indexer
.GetItemCount ();
333 /////////////////////////////////////////
335 public FileStream
ReadDataStream (string name
)
337 string path
= Path
.Combine (Path
.Combine (PathFinder
.IndexDir
, this.IndexName
), name
);
339 if (!File
.Exists (path
))
342 return new FileStream (path
, System
.IO
.FileMode
.Open
, FileAccess
.Read
);
345 public string ReadDataLine (string name
)
347 FileStream stream
= ReadDataStream (name
);
352 StreamReader reader
= new StreamReader (stream
);
353 string line
= reader
.ReadLine ();
359 public FileStream
WriteDataStream (string name
)
361 string path
= Path
.Combine (Path
.Combine (PathFinder
.IndexDir
, this.IndexName
), name
);
363 return new FileStream (path
, System
.IO
.FileMode
.Create
, FileAccess
.Write
);
368 public void WriteDataLine (string name
, string line
)
371 string path
= Path
.Combine (Path
.Combine (PathFinder
.IndexDir
, this.IndexName
), name
);
373 if (File
.Exists (path
))
379 FileStream stream
= WriteDataStream (name
);
380 StreamWriter writer
= new StreamWriter (stream
);
381 writer
.WriteLine (line
);
386 //////////////////////////////////////////////////////////////////////////////////
388 private class LuceneTaskCollector
: Scheduler
.ITaskCollector
{
392 public LuceneTaskCollector (IIndexer indexer
)
394 this.indexer
= indexer
;
397 public double GetMinimumWeight ()
402 public double GetMaximumWeight ()
404 // FIXME: this is totally arbitrary
408 public void PreTaskHook ()
413 public void PostTaskHook ()
420 //////////////////////////////////////////////////////////////////////////////////
422 // Adding a single indexable
424 private delegate bool PreAddHookDelegate (Indexable indexable
);
426 private class AddTask
: Scheduler
.Task
{
429 PreAddHookDelegate pre_add_hook
;
431 public AddTask (IIndexer indexer
,
433 PreAddHookDelegate pre_add_hook
)
435 this.indexer
= indexer
;
436 this.indexable
= indexable
;
437 this.pre_add_hook
= pre_add_hook
;
438 this.Tag
= indexable
.DisplayUri
.ToString ();
442 override protected void DoTaskReal ()
444 if (pre_add_hook
== null || pre_add_hook (indexable
))
445 indexer
.Add (indexable
);
449 virtual protected bool PreAddHook (Indexable indexable
)
454 // If we are remapping Uris, indexables should be added to the
455 // index with the internal Uri attached. This the receipt
456 // will come back w/ an internal Uri. In order for change
457 // notification to work correctly, we have to map it to
459 virtual protected void PostAddHook (IndexerAddedReceipt receipt
)
461 // Does nothing by default
464 public Scheduler
.Task
NewAddTask (Indexable indexable
)
467 task
= new AddTask (this.indexer
, indexable
,
468 new PreAddHookDelegate (this.PreAddHook
));
469 task
.Collector
= collector
;
473 //////////////////////////////////////////////////////////////////////////////////
475 // Adding an indexable generator
477 private class AddGeneratorTask
: Scheduler
.Task
{
479 IIndexableGenerator generator
;
480 PreAddHookDelegate pre_add_hook
;
482 // Hook to be invoked after the IIndexableGenerator
483 // has finished processing a batch of Indexables,
484 // just prior to flushing the driver.
485 Scheduler
.Hook pre_flush_hook
;
487 // FIXME: number of items generated
488 // from the Indexable shouldn't be
490 const int hard_wired_generation_count
= 30;
492 public AddGeneratorTask (IIndexer indexer
,
493 IIndexableGenerator generator
,
494 PreAddHookDelegate pre_add_hook
,
495 Scheduler
.Hook pre_flush_hook
)
497 this.indexer
= indexer
;
498 this.generator
= generator
;
499 this.pre_add_hook
= pre_add_hook
;
500 this.pre_flush_hook
= pre_flush_hook
;
501 this.Tag
= generator
.StatusName
;
502 this.Weight
= hard_wired_generation_count
;
505 override protected void DoTaskReal ()
507 // Since this is a generator, we want the task to
508 // get re-scheduled after it is run.
511 bool did_something
= false;
512 for (int count
= 0; count
< hard_wired_generation_count
; ++count
) {
513 if (! generator
.HasNextIndexable ()) {
514 // ...except if there is no more work to do, of course.
520 generated
= generator
.GetNextIndexable ();
522 // Note that the indexable generator can return null.
523 // This means that the generator didn't have an indexable
524 // to return this time through, but it does not mean that
525 // its processing queue is empty.
526 if (generated
== null)
529 if (pre_add_hook
== null || pre_add_hook (generated
)) {
530 indexer
.Add (generated
);
531 did_something
= true;
536 if (pre_flush_hook
!= null)
543 public Scheduler
.Task
NewAddTask (IIndexableGenerator generator
, Scheduler
.Hook pre_flush_hook
)
545 AddGeneratorTask task
;
546 task
= new AddGeneratorTask (this.indexer
,
548 new PreAddHookDelegate (this.PreAddHook
),
551 task
.Priority
= Scheduler
.Priority
.Generator
;
555 public Scheduler
.Task
NewAddTask (IIndexableGenerator generator
)
557 return NewAddTask (generator
, null);
560 //////////////////////////////////////////////////////////////////////////////////
562 // Removing a single item from the index
564 private delegate bool PreRemoveHookDelegate (Uri uri
);
566 private class RemoveTask
: Scheduler
.Task
{
569 PreRemoveHookDelegate pre_remove_hook
;
571 public RemoveTask (IIndexer indexer
,
573 PreRemoveHookDelegate pre_remove_hook
)
575 this.indexer
= indexer
;
577 this.pre_remove_hook
= pre_remove_hook
;
579 this.Tag
= uri
.ToString ();
580 this.Weight
= 0.24999; // this is arbitrary
583 override protected void DoTaskReal ()
585 if (pre_remove_hook
== null || pre_remove_hook (uri
)) {
587 indexer
.Remove (uri
);
592 virtual protected bool PreRemoveHook (Uri uri
)
597 // If we are remapping Uris, receipt.Uri will be passed in as an
598 // internal Uri. It needs to be mapped to an external uri for
599 // change notification to work properly.
600 virtual protected void PostRemoveHook (IndexerRemovedReceipt receipt
)
602 // Does nothing by default
605 public Scheduler
.Task
NewRemoveTask (Uri uri
)
608 task
= new RemoveTask (this.indexer
, uri
,
609 new PreRemoveHookDelegate (this.PreRemoveHook
));
610 task
.Collector
= collector
;
614 //////////////////////////////////////////////////////////////////////////////////
616 // Optimize the index
618 private class OptimizeTask
: Scheduler
.Task
{
621 public OptimizeTask (IIndexer indexer
)
623 this.indexer
= indexer
;
626 override protected void DoTaskReal ()
632 public Scheduler
.Task
NewOptimizeTask ()
635 task
= new OptimizeTask (this.indexer
);
636 task
.Tag
= "Optimize " + IndexName
;
637 task
.Priority
= Scheduler
.Priority
.Delayed
;
638 task
.Collector
= collector
;
642 private void OnOptimizeAllEvent ()
645 task
= NewOptimizeTask ();
646 ThisScheduler
.Add (task
);
649 //////////////////////////////////////////////////////////////////////////////////
653 // If this returns true, a task will automatically be created to
654 // add the child. Note that the PreAddHook will also be called,
656 virtual protected bool PreChildAddHook (Indexable child
)
661 //////////////////////////////////////////////////////////////////////////////////
663 private void OnFlushEvent (IIndexer source
, IndexerReceipt
[] receipts
)
665 // Just ignore flush-complete notifications
666 // and empty arrays of receipts.
667 if (receipts
== null || receipts
.Length
== 0)
670 if (fa_store
!= null)
671 fa_store
.BeginTransaction ();
673 ArrayList added_uris
= new ArrayList ();
674 ArrayList removed_uris
= new ArrayList ();
676 for (int i
= 0; i
< receipts
.Length
; ++i
) {
678 if (receipts
[i
] is IndexerAddedReceipt
) {
680 IndexerAddedReceipt r
;
681 r
= (IndexerAddedReceipt
) receipts
[i
];
683 // Add the Uri to the list for our change data
684 // before doing any post-processing.
685 // This ensures that we have internal uris when
687 added_uris
.Add (r
.Uri
);
689 // Call the appropriate hook
691 // Map from internal->external Uris in the PostAddHook
693 } catch (Exception ex
) {
694 Logger
.Log
.Warn ("Caught exception in PostAddHook '{0}' '{1}' '{2}'",
695 r
.Uri
, r
.FilterName
, r
.FilterVersion
);
696 Logger
.Log
.Warn (ex
);
699 // Every added Uri also needs to be listed as removed,
700 // to avoid duplicate hits in the query. Since the
701 // removed Uris need to be external Uris, we add them
702 // to the list *after* post-processing.
703 removed_uris
.Add (r
.Uri
);
706 } else if (receipts
[i
] is IndexerRemovedReceipt
) {
708 IndexerRemovedReceipt r
;
709 r
= (IndexerRemovedReceipt
) receipts
[i
];
711 // Drop the removed item from the text cache
712 TextCache
.UserCache
.Delete (r
.Uri
);
715 // Call the appropriate hook
718 } catch (Exception ex
) {
719 Logger
.Log
.Warn ("Caught exception in PostRemoveHook '{0}'",
721 Logger
.Log
.Warn (ex
);
724 // Add the removed Uri to the list for our
725 // change data. This will be an external Uri
726 // when we are remapping.
727 removed_uris
.Add (r
.Uri
);
729 } else if (receipts
[i
] is IndexerChildIndexablesReceipt
) {
731 IndexerChildIndexablesReceipt r
;
732 r
= (IndexerChildIndexablesReceipt
) receipts
[i
];
734 foreach (Indexable child
in r
.Children
) {
735 bool please_add_a_new_task
= false;
738 please_add_a_new_task
= PreChildAddHook (child
);
739 } catch (InvalidOperationException ex
) {
740 // Queryable does not support adding children
741 } catch (Exception ex
) {
742 Logger
.Log
.Warn ("Caught exception in PreChildAddHook '{0}'", child
.DisplayUri
);
743 Logger
.Log
.Warn (ex
);
746 if (please_add_a_new_task
) {
747 Scheduler
.Task task
= NewAddTask (child
);
748 // FIXME: Probably need a better priority than this
749 task
.Priority
= Scheduler
.Priority
.Generator
;
750 ThisScheduler
.Add (task
);
756 if (fa_store
!= null)
757 fa_store
.CommitTransaction ();
759 // Propagate the change notification to any open queries.
760 if (added_uris
.Count
> 0 || removed_uris
.Count
> 0) {
761 ChangeData change_data
;
762 change_data
= new ChangeData ();
763 change_data
.AddedUris
= added_uris
;
764 change_data
.RemovedUris
= removed_uris
;
766 QueryDriver
.QueryableChanged (this, change_data
);
770 //////////////////////////////////////////////////////////////////////////////////
773 // It is often convenient to have easy access to a FileAttributeStore
776 virtual protected IFileAttributesStore
BuildFileAttributesStore ()
778 if (ExtendedAttribute
.Supported
)
779 return new FileAttributesStore_ExtendedAttribute (IndexFingerprint
);
781 return new FileAttributesStore_Sqlite (IndexDirectory
, IndexFingerprint
);
785 public FileAttributesStore FileAttributesStore
{
787 if (fa_store
== null)
788 fa_store
= new FileAttributesStore (BuildFileAttributesStore ());
793 //////////////////////////////////////////////////////////////////////////////////
795 virtual protected LuceneQueryingDriver
BuildLuceneQueryingDriver (string index_name
,
799 return new LuceneQueryingDriver (index_name
, minor_version
, read_only_mode
);