4 // Copyright (C) 2004-2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
33 namespace Beagle
.Daemon
{
35 public abstract class LuceneQueryable
: BackendBase
{
37 static public bool OptimizeRightAway
= false;
39 public delegate IIndexer
IndexerCreator (string source_name
, int source_version
);
41 static private IndexerCreator indexer_hook
= null;
43 static public IndexerCreator IndexerHook
{
44 set { indexer_hook = value; }
47 //////////////////////////////////////////////////////////
49 public delegate void OptimizeAllHandler ();
51 static private OptimizeAllHandler OptimizeAllEvent
;
53 static public void OptimizeAll ()
55 if (OptimizeAllEvent
!= null)
59 //////////////////////////////////////////////////////////
61 private Scheduler scheduler
= Scheduler
.Global
;
62 private FileAttributesStore fa_store
= null;
64 private string source_name
;
66 private LuceneQueryingDriver driver
;
67 private IIndexer indexer
= null;
69 private Scheduler
.Task our_final_flush_task
= null;
70 private Scheduler
.Task our_optimize_task
= null;
72 private object request_lock
= new object ();
73 private IndexerRequest pending_request
= new IndexerRequest ();
75 //////////////////////////////////////////////////////////
77 public LuceneQueryable (string source_name
) : this (source_name
, -1, false) { }
79 public LuceneQueryable (string source_name
, bool read_only_mode
) : this (source_name
, -1, read_only_mode
) { }
81 public LuceneQueryable (string source_name
, int source_version
) : this (source_name
, source_version
, false) { }
83 public LuceneQueryable (string source_name
, int source_version
, bool read_only_mode
)
85 this.source_name
= source_name
;
87 driver
= BuildLuceneQueryingDriver (source_name
, source_version
, read_only_mode
);
88 driver
.RegisterHitFilter (source_name
, this.HitFilter
);
90 // If the queryable is in read-only more, don't
91 // instantiate an indexer for it.
95 if (indexer_hook
!= null)
96 indexer
= indexer_hook (source_name
, source_version
);
99 throw new Exception ("No indexer available for source " + source_name
);
101 OptimizeAllEvent
+= OnOptimizeAllEvent
;
103 // Schedule an optimize, just in case
106 Shutdown
.ShutdownEvent
+= new Shutdown
.ShutdownHandler (OnShutdownEvent
);
109 public override string Name
{
111 if (value != source_name
)
112 throw new Exception (String
.Format ("Backend name (from BackendFlavor) '{0}' does not match source name (from LuceneQueryable ctor) '{1}'", value, source_name
));
118 protected string IndexDirectory
{
119 get { return driver.TopDirectory; }
122 protected string IndexFingerprint
{
123 get { return driver.Fingerprint; }
126 protected string SourceDataDir
{
127 get { return Path.Combine (IndexDirectory, this.source_name); }
130 protected LuceneQueryingDriver Driver
{
131 get { return driver; }
134 public Scheduler ThisScheduler
{
135 get { return scheduler; }
138 /////////////////////////////////////////
140 public override void Start ()
145 public override IQueryable Queryable
{
146 get { return driver; }
149 /////////////////////////////////////////
151 virtual protected void ShutdownHook ()
156 private void OnShutdownEvent ()
159 pending_request
.Cleanup ();
163 } catch (Exception ex
) {
164 Logger
.Log
.Warn (ex
, "Caught exception in shutdown hook");
168 /////////////////////////////////////////
170 virtual protected bool HitFilter (Hit hit
)
175 /////////////////////////////////////////
177 // DEPRECATED: This does nothing, since everything is now
179 virtual protected double RelevancyMultiplier (Hit hit
)
184 static protected double HalfLifeMultiplier (DateTime dt
, int half_life_days
)
186 double days
= Math
.Abs ((DateTime
.Now
- dt
).TotalDays
);
189 return Math
.Pow (0.5, days
/ (double) half_life_days
);
192 // FIXME: A decaying half-life is a little sketchy, since data
193 // will eventually decay beyond the epsilon and be dropped
194 // from the results entirely, which is almost never what we
195 // want, particularly in searches with a few number of
196 // results. But with a default half-life of 6 months, it'll
197 // take over 13 years to fully decay outside the epsilon on
198 // this multiplier alone.
199 static protected double HalfLifeMultiplier (DateTime time
)
201 // Default relevancy half-life is six months.
202 return HalfLifeMultiplier (time
, 182);
205 static protected double HalfLifeMultiplierFromProperty (Hit hit
,
206 double default_multiplier
,
207 params object [] properties
)
209 double best_m
= -1.0;
211 foreach (object obj
in properties
) {
212 string key
= obj
as string;
213 string val
= hit
[key
];
215 DateTime dt
= StringFu
.StringToDateTime (val
);
217 this_m
= HalfLifeMultiplier (dt
, 182); /* 182 days == six months */
224 best_m
= default_multiplier
;
228 /////////////////////////////////////////
230 protected string GetSnippetFromTextCache (string [] query_terms
, Uri uri
)
232 // Look up the hit in our text cache. If it is there,
233 // use the cached version to generate a snippet.
236 reader
= TextCache
.UserCache
.GetReader (uri
);
240 string snippet
= SnippetFu
.GetSnippet (query_terms
, reader
);
246 // When remapping, override this with
247 // return GetSnippetFromTextCache (query_terms, remapping_fn (hit.Uri))
248 public override string GetSnippet (string [] query_terms
, Hit hit
)
250 return GetSnippetFromTextCache (query_terms
, hit
.Uri
);
253 /////////////////////////////////////////
255 private int progress_percent
= -1;
256 private QueryableState state
= QueryableState
.Idle
;
257 private DateTime last_state_change
= DateTime
.MinValue
;
259 public override QueryableStatus
GetBackendStatus ()
261 QueryableStatus status
= new QueryableStatus ();
263 status
.Name
= this.Name
;
264 status
.State
= state
;
265 status
.ProgressPercent
= progress_percent
;
267 // If we're in read-only mode, query the driver
268 // and not the indexer for the item count.
270 status
.ItemCount
= driver
.GetItemCount ();
272 status
.ItemCount
= indexer
.GetItemCount ();
274 // Frequent state changes are common, and there isn't
275 // a real state machine with continuity when it comes
276 // to the indexing process. A delayed indexing task,
277 // for example, might not actually run for several
278 // seconds after it is scheduled. In this case, the
279 // backend might be in an "Idle" state, but the
280 // indexing process clearly isn't done. To work
281 // around this, we also track the last time the state
282 // changed. If it's less than some threshold, then
283 // we consider ourselves to still be in the process of
285 if (state
!= QueryableState
.NotApplicable
286 && (state
!= QueryableState
.Idle
287 || (DateTime
.Now
- last_state_change
).TotalSeconds
<= 30))
288 status
.IsIndexing
= true;
293 public QueryableState State
{
294 get { return this.state; }
296 //Logger.Log.Debug ("State {0}: {1} -> {2}", this, this.state, value);
299 this.last_state_change
= DateTime
.Now
;
303 public int ProgressPercent
{
304 get { return this.progress_percent; }
305 set { this.progress_percent = value; }
308 /////////////////////////////////////////
310 public FileStream
ReadDataStream (string name
)
312 if (! Directory
.Exists (SourceDataDir
))
315 string path
= Path
.Combine (SourceDataDir
, name
);
317 if (!File
.Exists (path
))
320 return new FileStream (path
, System
.IO
.FileMode
.Open
, FileAccess
.Read
);
323 public string ReadDataLine (string name
)
325 FileStream stream
= ReadDataStream (name
);
330 StreamReader reader
= new StreamReader (stream
);
331 string line
= reader
.ReadLine ();
337 public FileStream
WriteDataStream (string name
)
339 if (! Directory
.Exists (SourceDataDir
))
340 Directory
.CreateDirectory (SourceDataDir
);
342 string path
= Path
.Combine (SourceDataDir
, name
);
344 return new FileStream (path
, System
.IO
.FileMode
.Create
, FileAccess
.Write
, FileShare
.ReadWrite
);
347 public void WriteDataLine (string name
, string line
)
350 if (! Directory
.Exists (SourceDataDir
))
353 string path
= Path
.Combine (SourceDataDir
, name
);
355 if (File
.Exists (path
))
361 FileStream stream
= WriteDataStream (name
);
362 StreamWriter writer
= new StreamWriter (stream
);
363 writer
.WriteLine (line
);
368 //////////////////////////////////////////////////////////////////////////////////
370 // More hooks. These are mostly here for the file system backend.
372 virtual protected bool PreAddIndexableHook (Indexable indexable
)
374 // By default, we like everything.
378 // If we are remapping Uris, indexables should be added to the
379 // index with the internal Uri attached. This the receipt
380 // will come back w/ an internal Uri. In order for change
381 // notification to work correctly, we have to map it to
383 virtual protected void PostAddHook (Indexable indexable
, IndexerAddedReceipt receipt
)
385 // Does nothing by default
388 virtual protected void PostRemoveHook (Indexable indexable
, IndexerRemovedReceipt receipt
)
390 // Does nothing by default
393 //////////////////////////////////////////////////////////////////////////////////
395 // Adding a single indexable
397 private class AddTask
: Scheduler
.Task
{
398 LuceneQueryable queryable
;
401 public AddTask (LuceneQueryable queryable
,
404 this.queryable
= queryable
;
405 this.indexable
= indexable
;
406 this.Tag
= indexable
.DisplayUri
.ToString ();
410 override protected void DoTaskReal ()
412 QueryableState old_state
= queryable
.State
;
413 queryable
.State
= QueryableState
.Indexing
;
415 if (queryable
.PreAddIndexableHook (indexable
)) {
416 queryable
.AddIndexable (indexable
);
418 if (Priority
== Scheduler
.Priority
.Immediate
)
421 queryable
.ConditionalFlush ();
424 queryable
.State
= old_state
;
427 override protected void DoCleanup ()
429 indexable
.Cleanup ();
433 public Scheduler
.Task
NewAddTask (Indexable indexable
)
436 task
= new AddTask (this, indexable
);
441 //////////////////////////////////////////////////////////////////////////////////
443 // Adding an indexable generator
445 private class AddGeneratorTask
: Scheduler
.Task
{
446 LuceneQueryable queryable
;
447 IIndexableGenerator generator
;
449 public AddGeneratorTask (LuceneQueryable queryable
,
450 IIndexableGenerator generator
)
452 this.queryable
= queryable
;
453 this.generator
= generator
;
454 this.Tag
= generator
.StatusName
;
457 override protected void DoTaskReal ()
459 // Since this is a generator, we want the task to
460 // get re-scheduled after it is run.
463 QueryableState old_state
= queryable
.State
;
464 queryable
.State
= QueryableState
.Indexing
;
466 // Number of times a null indexable was returned. We don't want
467 // to spin tightly in a loop here if we're not actually indexing
472 if (! generator
.HasNextIndexable ()) {
473 // Of course, don't reschedule if there is no more work to do.
479 generated
= generator
.GetNextIndexable ();
481 // Note that the indexable generator can return null.
482 // This means that the generator didn't have an indexable
483 // to return this time through, but it does not mean that
484 // its processing queue is empty.
485 if (generated
== null) {
488 if (misfires
> 179) // Another totally arbitrary number
494 if (queryable
.PreAddIndexableHook (generated
))
495 queryable
.AddIndexable (generated
);
497 generated
.Cleanup ();
499 // We keep adding indexables until a flush goes through.
500 } while (! queryable
.ConditionalFlush ());
502 generator
.PostFlushHook ();
504 queryable
.State
= old_state
;
507 override protected void DoCleanup ()
512 public Scheduler
.Task
NewAddTask (IIndexableGenerator generator
)
514 AddGeneratorTask task
;
515 task
= new AddGeneratorTask (this, generator
);
520 //////////////////////////////////////////////////////////////////////////////////
522 // There used to be a separate type of task for doing removes.
523 // This is all that remains of that old code.
524 public Scheduler
.Task
NewRemoveTask (Uri uri
)
527 indexable
= new Indexable (IndexableType
.Remove
, uri
);
529 return NewAddTask (indexable
);
532 //////////////////////////////////////////////////////////////////////////////////
534 public Scheduler
.Task
NewRemoveByPropertyTask (Property prop
)
536 PropertyRemovalGenerator prg
= new PropertyRemovalGenerator (driver
, prop
);
538 return NewAddTask (prg
);
541 ///////////////////////////////////////////////////////////////////////////////////
544 // An IIndexableGenerator that returns remove Indexables for
545 // all items which match a certain property
548 private class PropertyRemovalGenerator
: IIndexableGenerator
{
550 private LuceneQueryingDriver driver
;
551 private Property prop_to_match
;
552 private Uri
[] uris_to_remove
;
555 public PropertyRemovalGenerator (LuceneQueryingDriver driver
, Property prop
)
557 this.driver
= driver
;
558 this.prop_to_match
= prop
;
561 public Indexable
GetNextIndexable ()
565 indexable
= new Indexable (IndexableType
.Remove
, uris_to_remove
[idx
]);
571 public bool HasNextIndexable ()
573 if (uris_to_remove
== null)
574 uris_to_remove
= this.driver
.PropertyQuery (this.prop_to_match
);
576 if (idx
< uris_to_remove
.Length
)
582 public string StatusName
{
584 return String
.Format ("Removing {0}={1}", prop_to_match
.Key
, prop_to_match
.Value
);
588 public void PostFlushHook () { }
592 //////////////////////////////////////////////////////////////////////////////////
594 // When all other tasks are complete, we need to do a final flush.
595 // We schedule that as a maintenance task.
597 private class FinalFlushTask
: Scheduler
.Task
{
598 LuceneQueryable queryable
;
600 public FinalFlushTask (LuceneQueryable queryable
)
602 this.queryable
= queryable
;
606 override protected void DoTaskReal ()
612 private void ScheduleFinalFlush ()
614 if (our_final_flush_task
== null) {
615 our_final_flush_task
= new FinalFlushTask (this);
617 our_final_flush_task
.Tag
= "Final Flush for " + Name
;
618 our_final_flush_task
.Priority
= Scheduler
.Priority
.Maintenance
;
619 our_final_flush_task
.SubPriority
= 100; // do this first when starting maintenance
620 our_final_flush_task
.Source
= this;
623 ThisScheduler
.Add (our_final_flush_task
);
627 //////////////////////////////////////////////////////////////////////////////////
629 // Optimize the index
631 private DateTime last_optimize_time
= DateTime
.MinValue
;
633 public DateTime LastOptimizeTime
{
634 get { return last_optimize_time; }
635 set { last_optimize_time = value; }
638 private class OptimizeTask
: Scheduler
.Task
{
639 LuceneQueryable queryable
;
641 public OptimizeTask (LuceneQueryable queryable
)
643 this.queryable
= queryable
;
646 override protected void DoTaskReal ()
648 queryable
.Optimize ();
649 queryable
.LastOptimizeTime
= DateTime
.Now
;
653 public Scheduler
.Task
NewOptimizeTask ()
656 task
= new OptimizeTask (this);
657 task
.Tag
= "Optimize " + Name
;
658 task
.Priority
= Scheduler
.Priority
.Maintenance
;
664 private void OnOptimizeAllEvent ()
667 task
= NewOptimizeTask (); // construct an optimizer task
668 task
.Priority
= Scheduler
.Priority
.Delayed
; // but boost the priority
669 ThisScheduler
.Add (task
);
672 private void ScheduleOptimize ()
674 double optimize_delay
;
676 // Really we only want to optimize at most once a day, even if we have
677 // indexed a ton of dat
678 TimeSpan span
= DateTime
.Now
- last_optimize_time
;
679 if (span
.TotalDays
> 1.0)
680 optimize_delay
= 10.0; // minutes;
682 optimize_delay
= (new TimeSpan (TimeSpan
.TicksPerDay
) - span
).TotalMinutes
;
684 if (our_optimize_task
== null)
685 our_optimize_task
= NewOptimizeTask ();
687 if (OptimizeRightAway
|| Environment
.GetEnvironmentVariable ("BEAGLE_UNDER_BLUDGEON") != null)
688 optimize_delay
= 1/120.0; // half a second
690 // Changing the trigger time of an already-scheduled process
691 // does what you would expect.
692 our_optimize_task
.TriggerTime
= DateTime
.Now
.AddMinutes (optimize_delay
);
694 // Adding the same task more than once is a harmless no-op.
695 ThisScheduler
.Add (our_optimize_task
);
698 //////////////////////////////////////////////////////////////////////////////////
702 // If this returns true, a task will automatically be created to
704 virtual protected bool PreChildAddHook (Indexable child
)
709 virtual protected void PreFlushHook (IndexerRequest flushed_request
)
712 virtual protected void PostFlushHook (IndexerRequest flushed_request
,
713 IndexerReceipt
[] receipts
)
716 //////////////////////////////////////////////////////////////////////////////////
718 protected void AddIndexable (Indexable indexable
)
720 indexable
.Source
= this.Name
;
723 pending_request
.Add (indexable
);
725 // Schedule a final flush every time we add anything.
726 // Better safe than sorry.
727 ScheduleFinalFlush ();
730 protected void Optimize ()
732 lock (request_lock
) {
733 pending_request
.OptimizeIndex
= true;
738 // Returns true if we actually did flush, false otherwise.
739 protected bool ConditionalFlush ()
741 QueryableState old_state
= State
;
742 State
= QueryableState
.Flushing
;
745 lock (request_lock
) {
746 if (pending_request
.Count
> 37) { // a total arbitrary magic number
757 protected void Flush ()
759 QueryableState old_state
= State
;
760 State
= QueryableState
.Flushing
;
769 private void DoFlush ()
771 IndexerRequest flushed_request
;
773 lock (request_lock
) {
774 if (pending_request
.IsEmpty
)
777 flushed_request
= pending_request
;
778 pending_request
= new IndexerRequest ();
780 // We hold the request_lock when calling PreFlushHook, so
781 // that no other requests can come in until it exits.
782 PreFlushHook (flushed_request
);
785 IndexerReceipt
[] receipts
;
786 receipts
= indexer
.Flush (flushed_request
);
788 PostFlushHook (flushed_request
, receipts
);
790 // Silently return if we get a null back. This is probably
791 // a bad thing to do.
792 if (receipts
== null)
795 // Nothing happened (except maybe an optimize, which does not
796 // generate a receipt). Also do nothing.
797 if (receipts
.Length
== 0)
800 // Update the cached count of items in the driver
801 driver
.SetItemCount (indexer
.GetItemCount ());
803 // Something happened, so schedule an optimize just in case.
806 if (fa_store
!= null)
807 fa_store
.BeginTransaction ();
809 ArrayList added_uris
= new ArrayList ();
810 ArrayList removed_uris
= new ArrayList ();
812 for (int i
= 0; i
< receipts
.Length
; ++i
) {
814 if (receipts
[i
] is IndexerAddedReceipt
) {
816 IndexerAddedReceipt r
;
817 r
= (IndexerAddedReceipt
) receipts
[i
];
819 // Add the Uri to the list for our change data
820 // before doing any post-processing.
821 // This ensures that we have internal uris when
823 added_uris
.Add (r
.Uri
);
825 // Call the appropriate hook
827 // Map from internal->external Uris in the PostAddHook
828 PostAddHook (flushed_request
.GetByUri (r
.Uri
), r
);
829 } catch (Exception ex
) {
830 Logger
.Log
.Warn (ex
, "Caught exception in PostAddHook '{0}' '{1}' '{2}'",
831 r
.Uri
, r
.FilterName
, r
.FilterVersion
);
834 // Every added Uri also needs to be listed as removed,
835 // to avoid duplicate hits in the query. Since the
836 // removed Uris need to be external Uris, we add them
837 // to the list *after* post-processing.
838 removed_uris
.Add (r
.Uri
);
841 } else if (receipts
[i
] is IndexerRemovedReceipt
) {
843 IndexerRemovedReceipt r
;
844 r
= (IndexerRemovedReceipt
) receipts
[i
];
846 // Drop the removed item from the text cache
847 TextCache
.UserCache
.Delete (r
.Uri
);
850 // Call the appropriate hook
852 PostRemoveHook (flushed_request
.GetByUri (r
.Uri
), r
);
853 } catch (Exception ex
) {
854 Logger
.Log
.Warn (ex
, "Caught exception in PostRemoveHook '{0}'",
858 // Add the removed Uri to the list for our
859 // change data. This will be an external Uri
860 // when we are remapping.
861 removed_uris
.Add (r
.Uri
);
863 } else if (receipts
[i
] is IndexerChildIndexablesReceipt
) {
865 IndexerChildIndexablesReceipt r
;
866 r
= (IndexerChildIndexablesReceipt
) receipts
[i
];
868 foreach (Indexable child
in r
.Children
) {
869 bool please_add_a_new_task
= false;
872 please_add_a_new_task
= PreChildAddHook (child
);
873 } catch (InvalidOperationException ex
) {
874 // Queryable does not support adding children
875 } catch (Exception ex
) {
876 Logger
.Log
.Warn (ex
, "Caught exception in PreChildAddHook '{0}'", child
.DisplayUri
);
879 if (please_add_a_new_task
) {
880 //Logger.Log.Debug ("Adding child {0}", child.Uri);
881 Scheduler
.Task task
= NewAddTask (child
);
882 task
.SubPriority
= 1;
883 ThisScheduler
.Add (task
);
890 if (fa_store
!= null)
891 fa_store
.CommitTransaction ();
893 // Propagate the change notification to any open queries.
894 if (added_uris
.Count
> 0 || removed_uris
.Count
> 0)
895 driver
.QueryableChanged (added_uris
, removed_uris
);
898 //////////////////////////////////////////////////////////////////////////////////
901 // It is often convenient to have easy access to a FileAttributeStore
904 virtual protected IFileAttributesStore
BuildFileAttributesStore ()
906 if (ExtendedAttribute
.Supported
)
907 return new FileAttributesStore_ExtendedAttribute (IndexFingerprint
);
909 return new FileAttributesStore_Sqlite (IndexDirectory
, IndexFingerprint
);
913 public FileAttributesStore FileAttributesStore
{
915 if (fa_store
== null)
916 fa_store
= new FileAttributesStore (BuildFileAttributesStore ());
921 //////////////////////////////////////////////////////////////////////////////////
923 virtual protected LuceneQueryingDriver
BuildLuceneQueryingDriver (string source_name
,
927 //return new LuceneQueryingDriver (source_name, source_version, read_only_mode);
928 return LuceneQueryingDriver
.Singleton
;