Thumbnail file hits. Based on a patch from D Bera
[beagle.git] / beagled / LuceneQueryable.cs
blobcfaa87c4e71152d8ce132c19c1ac16932ab52faf
1 //
2 // LuceneQueryable.cs
3 //
4 // Copyright (C) 2004-2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
31 using Beagle.Util;
33 namespace Beagle.Daemon {
35 public abstract class LuceneQueryable : IQueryable {
37 public delegate IIndexer IndexerCreator (string name, int minor_version);
39 static private IndexerCreator indexer_hook = null;
41 static public IndexerCreator IndexerHook {
42 set { indexer_hook = value; }
45 virtual protected IIndexer LocalIndexerHook ()
47 return null;
50 //////////////////////////////////////////////////////////
52 public delegate void OptimizeAllHandler ();
54 static private OptimizeAllHandler OptimizeAllEvent;
56 static public void OptimizeAll ()
58 if (OptimizeAllEvent != null)
59 OptimizeAllEvent ();
62 //////////////////////////////////////////////////////////
64 private Scheduler scheduler = Scheduler.Global;
65 private FileAttributesStore fa_store = null;
67 private string index_name;
68 private int minor_version;
69 private bool read_only_mode;
71 private LuceneQueryingDriver driver;
72 private IIndexer indexer = null;
73 private LuceneTaskCollector collector;
75 private LuceneQueryingDriver.UriFilter our_uri_filter;
76 private LuceneCommon.HitFilter our_hit_filter;
78 //////////////////////////////////////////////////////////
80 public LuceneQueryable (string index_name) : this (index_name, -1, false) { }
82 public LuceneQueryable (string index_name, bool read_only_mode) : this (index_name, -1, read_only_mode) { }
84 public LuceneQueryable (string index_name, int minor_version) : this (index_name, minor_version, false) { }
86 public LuceneQueryable (string index_name, int minor_version, bool read_only_mode)
88 this.index_name = index_name;
89 this.minor_version = minor_version;
90 this.read_only_mode = read_only_mode;
92 driver = BuildLuceneQueryingDriver (this.index_name, this.minor_version, this.read_only_mode);
93 our_uri_filter = new LuceneQueryingDriver.UriFilter (this.HitIsValidOrElse);
94 our_hit_filter = new LuceneCommon.HitFilter (this.HitFilter);
96 // If the queryable is in read-only more, don't
97 // instantiate an indexer for it.
98 if (read_only_mode)
99 return;
101 indexer = LocalIndexerHook ();
102 if (indexer == null && indexer_hook != null)
103 indexer = indexer_hook (this.index_name, this.minor_version);
105 indexer.FlushEvent += OnFlushEvent;
107 OptimizeAllEvent += OnOptimizeAllEvent;
109 collector = new LuceneTaskCollector (indexer);
112 protected string IndexName {
113 get { return index_name; }
116 protected string IndexDirectory {
117 get { return driver.TopDirectory; }
120 protected string IndexFingerprint {
121 get { return driver.Fingerprint; }
124 protected LuceneQueryingDriver Driver {
125 get { return driver; }
128 public Scheduler ThisScheduler {
129 get { return scheduler; }
132 /////////////////////////////////////////
134 virtual public void Start ()
139 /////////////////////////////////////////
141 virtual public bool AcceptQuery (Query query)
143 return true;
146 /////////////////////////////////////////
148 virtual protected bool HitIsValid (Uri uri)
150 return true;
153 // Schedule all non-valid Uris for removal.
154 private bool HitIsValidOrElse (Uri uri)
156 bool is_valid = HitIsValid (uri);
158 if (! is_valid && ! read_only_mode) {
160 // FIXME: There is probably a race here --- what if the hit
161 // becomes valid sometime between calling HitIsValid
162 // and the removal task being executed?
164 Scheduler.Task task = NewRemoveTask (uri);
165 ThisScheduler.Add (task, Scheduler.AddType.DeferToExisting);
168 return is_valid;
171 virtual protected bool HitFilter (Hit hit)
173 return true;
176 /////////////////////////////////////////
178 virtual protected Hit PostProcessHit (Hit hit)
180 return hit;
183 /////////////////////////////////////////
185 // DEPRECATED: This does nothing, since everything is now
186 // time-based.
187 virtual protected double RelevancyMultiplier (Hit hit)
189 return 1.0;
192 static protected double HalfLifeMultiplier (DateTime dt, int half_life_days)
194 double days = Math.Abs ((DateTime.Now - dt).TotalDays);
195 if (days < 0)
196 return 1.0f;
197 return Math.Pow (0.5, days / (double) half_life_days);
200 // FIXME: A decaying half-life is a little sketchy, since data
201 // will eventually decay beyond the epsilon and be dropped
202 // from the results entirely, which is almost never what we
203 // want, particularly in searches with a few number of
204 // results. But with a default half-life of 6 months, it'll
205 // take over 13 years to fully decay outside the epsilon on
206 // this multiplier alone.
207 static protected double HalfLifeMultiplier (DateTime time)
209 // Default relevancy half-life is six months.
210 return HalfLifeMultiplier (time, 182);
213 static protected double HalfLifeMultiplierFromProperty (Hit hit,
214 double default_multiplier,
215 params object [] properties)
217 double best_m = -1.0;
219 foreach (object obj in properties) {
220 string key = obj as string;
221 string val = hit [key];
222 if (val != null) {
223 DateTime dt = StringFu.StringToDateTime (val);
224 double this_m;
225 this_m = HalfLifeMultiplier (dt, 182); /* 182 days == six months */
226 if (this_m > best_m)
227 best_m = this_m;
231 if (best_m < 0)
232 best_m = default_multiplier;
233 return best_m;
236 /////////////////////////////////////////
238 // *** FIXME *** FIXME *** FIXME *** FIXME ***
239 // When we rename a directory, we need to somehow
240 // propagate change information to files under that
241 // directory. Example: say that file foo is in
242 // directory bar, and there is an open query that
243 // matches foo. The tile probably says something
244 // like "foo, in folder bar".
245 // Then assume I rename bar to baz. That notification
246 // will go out, so a query matching bar will get
247 // updated... but the query matching foo will not.
248 // What should really happen is that the tile
249 // should change to say "foo, in folder baz".
250 // But making that work will require some hacking
251 // on the QueryResults.
252 // *** FIXME *** FIXME *** FIXME *** FIXME ***
254 private class ChangeData : IQueryableChangeData {
256 // These get fed back to LuceneQueryingDriver.DoQuery
257 // as a search subset, and hence need to be internal
258 // Uris when we are remapping.
259 public ICollection AddedUris;
261 // These get reported directly to clients in
262 // Subtract events, and thus need to be external Uris
263 // when we are remapping.
264 public ICollection RemovedUris;
267 public void DoQuery (Query query,
268 IQueryResult query_result,
269 IQueryableChangeData i_change_data)
271 ChangeData change_data = (ChangeData) i_change_data;
273 ICollection added_uris = null;
275 if (change_data != null) {
277 if (change_data.RemovedUris != null)
278 query_result.Subtract (change_data.RemovedUris);
280 // If nothing was added, we can safely return now: this change
281 // cannot have any further effect on an outstanding live query.
282 if (change_data.AddedUris == null
283 || change_data.AddedUris.Count == 0)
284 return;
286 added_uris = change_data.AddedUris;
289 Driver.DoQuery (query,
290 query_result,
291 added_uris,
292 our_uri_filter,
293 our_hit_filter);
296 /////////////////////////////////////////
298 protected string GetSnippetFromTextCache (string [] query_terms, Uri uri)
300 // Look up the hit in our text cache. If it is there,
301 // use the cached version to generate a snippet.
303 TextReader reader;
304 reader = TextCache.UserCache.GetReader (uri);
305 if (reader == null)
306 return null;
308 string snippet = SnippetFu.GetSnippet (query_terms, reader);
309 reader.Close ();
311 return snippet;
314 // When remapping, override this with
315 // return GetSnippetFromTextCache (query_terms, remapping_fn (hit.Uri))
316 virtual public string GetSnippet (string [] query_terms, Hit hit)
318 return GetSnippetFromTextCache (query_terms, hit.Uri);
321 /////////////////////////////////////////
323 public virtual int GetItemCount ()
325 // If we're in read-only mode, query the driver and
326 // not the indexer for the item count.
327 if (indexer == null)
328 return driver.GetItemCount ();
329 else
330 return indexer.GetItemCount ();
333 /////////////////////////////////////////
335 public FileStream ReadDataStream (string name)
337 string path = Path.Combine (Path.Combine (PathFinder.IndexDir, this.IndexName), name);
339 if (!File.Exists (path))
340 return null;
342 return new FileStream (path, System.IO.FileMode.Open, FileAccess.Read);
345 public string ReadDataLine (string name)
347 FileStream stream = ReadDataStream (name);
349 if (stream == null)
350 return null;
352 StreamReader reader = new StreamReader (stream);
353 string line = reader.ReadLine ();
354 reader.Close ();
356 return line;
359 public FileStream WriteDataStream (string name)
361 string path = Path.Combine (Path.Combine (PathFinder.IndexDir, this.IndexName), name);
363 return new FileStream (path, System.IO.FileMode.Create, FileAccess.Write);
368 public void WriteDataLine (string name, string line)
370 if (line == null) {
371 string path = Path.Combine (Path.Combine (PathFinder.IndexDir, this.IndexName), name);
373 if (File.Exists (path))
374 File.Delete (path);
376 return;
379 FileStream stream = WriteDataStream (name);
380 StreamWriter writer = new StreamWriter (stream);
381 writer.WriteLine (line);
382 writer.Close ();
386 //////////////////////////////////////////////////////////////////////////////////
388 private class LuceneTaskCollector : Scheduler.ITaskCollector {
390 IIndexer indexer;
392 public LuceneTaskCollector (IIndexer indexer)
394 this.indexer = indexer;
397 public double GetMinimumWeight ()
399 return 0;
402 public double GetMaximumWeight ()
404 // FIXME: this is totally arbitrary
405 return 37;
408 public void PreTaskHook ()
410 // Do nothing
413 public void PostTaskHook ()
415 indexer.Flush ();
420 //////////////////////////////////////////////////////////////////////////////////
422 // Adding a single indexable
424 private delegate bool PreAddHookDelegate (Indexable indexable);
426 private class AddTask : Scheduler.Task {
427 IIndexer indexer;
428 Indexable indexable;
429 PreAddHookDelegate pre_add_hook;
431 public AddTask (IIndexer indexer,
432 Indexable indexable,
433 PreAddHookDelegate pre_add_hook)
435 this.indexer = indexer;
436 this.indexable = indexable;
437 this.pre_add_hook = pre_add_hook;
438 this.Tag = indexable.DisplayUri.ToString ();
439 this.Weight = 1;
442 override protected void DoTaskReal ()
444 if (pre_add_hook == null || pre_add_hook (indexable))
445 indexer.Add (indexable);
449 virtual protected bool PreAddHook (Indexable indexable)
451 return true;
454 // If we are remapping Uris, indexables should be added to the
455 // index with the internal Uri attached. This the receipt
456 // will come back w/ an internal Uri. In order for change
457 // notification to work correctly, we have to map it to
458 // an external Uri.
459 virtual protected void PostAddHook (IndexerAddedReceipt receipt)
461 // Does nothing by default
464 public Scheduler.Task NewAddTask (Indexable indexable)
466 AddTask task;
467 task = new AddTask (this.indexer, indexable,
468 new PreAddHookDelegate (this.PreAddHook));
469 task.Collector = collector;
470 return task;
473 //////////////////////////////////////////////////////////////////////////////////
475 // Adding an indexable generator
477 private class AddGeneratorTask : Scheduler.Task {
478 IIndexer indexer;
479 IIndexableGenerator generator;
480 PreAddHookDelegate pre_add_hook;
482 // Hook to be invoked after the IIndexableGenerator
483 // has finished processing a batch of Indexables,
484 // just prior to flushing the driver.
485 Scheduler.Hook pre_flush_hook;
487 // FIXME: number of items generated
488 // from the Indexable shouldn't be
489 // hard-wired
490 const int hard_wired_generation_count = 30;
492 public AddGeneratorTask (IIndexer indexer,
493 IIndexableGenerator generator,
494 PreAddHookDelegate pre_add_hook,
495 Scheduler.Hook pre_flush_hook)
497 this.indexer = indexer;
498 this.generator = generator;
499 this.pre_add_hook = pre_add_hook;
500 this.pre_flush_hook = pre_flush_hook;
501 this.Tag = generator.StatusName;
502 this.Weight = hard_wired_generation_count;
505 override protected void DoTaskReal ()
507 // Since this is a generator, we want the task to
508 // get re-scheduled after it is run.
509 Reschedule = true;
511 bool did_something = false;
512 for (int count = 0; count < hard_wired_generation_count; ++count) {
513 if (! generator.HasNextIndexable ()) {
514 // ...except if there is no more work to do, of course.
515 Reschedule = false;
516 break;
519 Indexable generated;
520 generated = generator.GetNextIndexable ();
522 // Note that the indexable generator can return null.
523 // This means that the generator didn't have an indexable
524 // to return this time through, but it does not mean that
525 // its processing queue is empty.
526 if (generated == null)
527 break;
529 if (pre_add_hook == null || pre_add_hook (generated)) {
530 indexer.Add (generated);
531 did_something = true;
535 if (did_something) {
536 if (pre_flush_hook != null)
537 pre_flush_hook ();
538 indexer.Flush ();
543 public Scheduler.Task NewAddTask (IIndexableGenerator generator, Scheduler.Hook pre_flush_hook)
545 AddGeneratorTask task;
546 task = new AddGeneratorTask (this.indexer,
547 generator,
548 new PreAddHookDelegate (this.PreAddHook),
549 pre_flush_hook);
551 task.Priority = Scheduler.Priority.Generator;
552 return task;
555 public Scheduler.Task NewAddTask (IIndexableGenerator generator)
557 return NewAddTask (generator, null);
560 //////////////////////////////////////////////////////////////////////////////////
562 // Removing a single item from the index
564 private delegate bool PreRemoveHookDelegate (Uri uri);
566 private class RemoveTask : Scheduler.Task {
567 IIndexer indexer;
568 Uri uri;
569 PreRemoveHookDelegate pre_remove_hook;
571 public RemoveTask (IIndexer indexer,
572 Uri uri,
573 PreRemoveHookDelegate pre_remove_hook)
575 this.indexer = indexer;
576 this.uri = uri;
577 this.pre_remove_hook = pre_remove_hook;
579 this.Tag = uri.ToString ();
580 this.Weight = 0.24999; // this is arbitrary
583 override protected void DoTaskReal ()
585 if (pre_remove_hook == null || pre_remove_hook (uri)) {
586 if (uri != null)
587 indexer.Remove (uri);
592 virtual protected bool PreRemoveHook (Uri uri)
594 return true;
597 // If we are remapping Uris, receipt.Uri will be passed in as an
598 // internal Uri. It needs to be mapped to an external uri for
599 // change notification to work properly.
600 virtual protected void PostRemoveHook (IndexerRemovedReceipt receipt)
602 // Does nothing by default
605 public Scheduler.Task NewRemoveTask (Uri uri)
607 RemoveTask task;
608 task = new RemoveTask (this.indexer, uri,
609 new PreRemoveHookDelegate (this.PreRemoveHook));
610 task.Collector = collector;
611 return task;
614 //////////////////////////////////////////////////////////////////////////////////
616 // Optimize the index
618 private class OptimizeTask : Scheduler.Task {
619 IIndexer indexer;
621 public OptimizeTask (IIndexer indexer)
623 this.indexer = indexer;
626 override protected void DoTaskReal ()
628 indexer.Optimize ();
632 public Scheduler.Task NewOptimizeTask ()
634 OptimizeTask task;
635 task = new OptimizeTask (this.indexer);
636 task.Tag = "Optimize " + IndexName;
637 task.Priority = Scheduler.Priority.Delayed;
638 task.Collector = collector;
639 return task;
642 private void OnOptimizeAllEvent ()
644 Scheduler.Task task;
645 task = NewOptimizeTask ();
646 ThisScheduler.Add (task);
649 //////////////////////////////////////////////////////////////////////////////////
651 // Other hooks
653 // If this returns true, a task will automatically be created to
654 // add the child. Note that the PreAddHook will also be called,
655 // as usual.
656 virtual protected bool PreChildAddHook (Indexable child)
658 return true;
661 //////////////////////////////////////////////////////////////////////////////////
663 private void OnFlushEvent (IIndexer source, IndexerReceipt [] receipts)
665 // Just ignore flush-complete notifications
666 // and empty arrays of receipts.
667 if (receipts == null || receipts.Length == 0)
668 return;
670 if (fa_store != null)
671 fa_store.BeginTransaction ();
673 ArrayList added_uris = new ArrayList ();
674 ArrayList removed_uris = new ArrayList ();
676 for (int i = 0; i < receipts.Length; ++i) {
678 if (receipts [i] is IndexerAddedReceipt) {
680 IndexerAddedReceipt r;
681 r = (IndexerAddedReceipt) receipts [i];
683 // Add the Uri to the list for our change data
684 // before doing any post-processing.
685 // This ensures that we have internal uris when
686 // we are remapping.
687 added_uris.Add (r.Uri);
689 // Call the appropriate hook
690 try {
691 // Map from internal->external Uris in the PostAddHook
692 PostAddHook (r);
693 } catch (Exception ex) {
694 Logger.Log.Warn ("Caught exception in PostAddHook '{0}' '{1}' '{2}'",
695 r.Uri, r.FilterName, r.FilterVersion);
696 Logger.Log.Warn (ex);
699 // Every added Uri also needs to be listed as removed,
700 // to avoid duplicate hits in the query. Since the
701 // removed Uris need to be external Uris, we add them
702 // to the list *after* post-processing.
703 removed_uris.Add (r.Uri);
706 } else if (receipts [i] is IndexerRemovedReceipt) {
708 IndexerRemovedReceipt r;
709 r = (IndexerRemovedReceipt) receipts [i];
711 // Drop the removed item from the text cache
712 TextCache.UserCache.Delete (r.Uri);
715 // Call the appropriate hook
716 try {
717 PostRemoveHook (r);
718 } catch (Exception ex) {
719 Logger.Log.Warn ("Caught exception in PostRemoveHook '{0}'",
720 r.Uri);
721 Logger.Log.Warn (ex);
724 // Add the removed Uri to the list for our
725 // change data. This will be an external Uri
726 // when we are remapping.
727 removed_uris.Add (r.Uri);
729 } else if (receipts [i] is IndexerChildIndexablesReceipt) {
731 IndexerChildIndexablesReceipt r;
732 r = (IndexerChildIndexablesReceipt) receipts [i];
734 foreach (Indexable child in r.Children) {
735 bool please_add_a_new_task = false;
737 try {
738 please_add_a_new_task = PreChildAddHook (child);
739 } catch (InvalidOperationException ex) {
740 // Queryable does not support adding children
741 } catch (Exception ex) {
742 Logger.Log.Warn ("Caught exception in PreChildAddHook '{0}'", child.DisplayUri);
743 Logger.Log.Warn (ex);
746 if (please_add_a_new_task) {
747 Scheduler.Task task = NewAddTask (child);
748 // FIXME: Probably need a better priority than this
749 task.Priority = Scheduler.Priority.Generator;
750 ThisScheduler.Add (task);
756 if (fa_store != null)
757 fa_store.CommitTransaction ();
759 // Propagate the change notification to any open queries.
760 if (added_uris.Count > 0 || removed_uris.Count > 0) {
761 ChangeData change_data;
762 change_data = new ChangeData ();
763 change_data.AddedUris = added_uris;
764 change_data.RemovedUris = removed_uris;
766 QueryDriver.QueryableChanged (this, change_data);
770 //////////////////////////////////////////////////////////////////////////////////
773 // It is often convenient to have easy access to a FileAttributeStore
776 virtual protected IFileAttributesStore BuildFileAttributesStore ()
778 if (ExtendedAttribute.Supported)
779 return new FileAttributesStore_ExtendedAttribute (IndexFingerprint);
780 else
781 return new FileAttributesStore_Sqlite (IndexDirectory, IndexFingerprint);
785 public FileAttributesStore FileAttributesStore {
786 get {
787 if (fa_store == null)
788 fa_store = new FileAttributesStore (BuildFileAttributesStore ());
789 return fa_store;
793 //////////////////////////////////////////////////////////////////////////////////
795 virtual protected LuceneQueryingDriver BuildLuceneQueryingDriver (string index_name,
796 int minor_version,
797 bool read_only_mode)
799 return new LuceneQueryingDriver (index_name, minor_version, read_only_mode);