Tokenize 001234 as 1234. Include a testing function in NoiseFilter to figure out...
[beagle.git] / beagled / LuceneQueryable.cs
blob35fc45c31ea2e968b4c2e4c1cc16192fee521eb7
1 //
2 // LuceneQueryable.cs
3 //
4 // Copyright (C) 2004-2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
31 using Beagle.Util;
33 namespace Beagle.Daemon {
35 public abstract class LuceneQueryable : IQueryable {
37 static public bool Debug = false;
38 static public bool OptimizeRightAway = false;
40 public delegate IIndexer IndexerCreator (string name, int minor_version);
42 static private IndexerCreator indexer_hook = null;
44 static public IndexerCreator IndexerHook {
45 set { indexer_hook = value; }
48 virtual protected IIndexer LocalIndexerHook ()
50 return null;
53 //////////////////////////////////////////////////////////
55 public delegate void OptimizeAllHandler ();
57 static private OptimizeAllHandler OptimizeAllEvent;
59 static public void OptimizeAll ()
61 if (OptimizeAllEvent != null)
62 OptimizeAllEvent ();
65 //////////////////////////////////////////////////////////
67 private Scheduler scheduler = Scheduler.Global;
68 private FileAttributesStore fa_store = null;
70 private string index_name;
71 private int minor_version;
72 private bool read_only_mode;
74 private LuceneQueryingDriver driver;
75 private IIndexer indexer = null;
77 private LuceneQueryingDriver.UriFilter our_uri_filter;
78 private LuceneCommon.HitFilter our_hit_filter;
79 private Scheduler.Task our_final_flush_task = null;
80 private Scheduler.Task our_optimize_task = null;
82 private object request_lock = new object ();
83 private IndexerRequest pending_request = new IndexerRequest ();
85 //////////////////////////////////////////////////////////
87 public LuceneQueryable (string index_name) : this (index_name, -1, false) { }
89 public LuceneQueryable (string index_name, bool read_only_mode) : this (index_name, -1, read_only_mode) { }
91 public LuceneQueryable (string index_name, int minor_version) : this (index_name, minor_version, false) { }
93 public LuceneQueryable (string index_name, int minor_version, bool read_only_mode)
95 this.index_name = index_name;
96 this.minor_version = minor_version;
97 this.read_only_mode = read_only_mode;
99 driver = BuildLuceneQueryingDriver (this.index_name, this.minor_version, this.read_only_mode);
100 our_uri_filter = new LuceneQueryingDriver.UriFilter (this.HitIsValid);
101 our_hit_filter = new LuceneCommon.HitFilter (this.HitFilter);
103 // If the queryable is in read-only more, don't
104 // instantiate an indexer for it.
105 if (read_only_mode)
106 return;
108 indexer = LocalIndexerHook ();
109 if (indexer == null && indexer_hook != null)
110 indexer = indexer_hook (this.index_name, this.minor_version);
112 OptimizeAllEvent += OnOptimizeAllEvent;
114 // Schedule an optimize, just in case
115 ScheduleOptimize ();
117 Shutdown.ShutdownEvent += new Shutdown.ShutdownHandler (OnShutdownEvent);
120 protected string IndexName {
121 get { return index_name; }
124 protected string IndexDirectory {
125 get { return driver.TopDirectory; }
128 protected string IndexFingerprint {
129 get { return driver.Fingerprint; }
132 protected LuceneQueryingDriver Driver {
133 get { return driver; }
136 public Scheduler ThisScheduler {
137 get { return scheduler; }
140 /////////////////////////////////////////
142 virtual public void Start ()
147 /////////////////////////////////////////
149 virtual protected void ShutdownHook ()
154 private void OnShutdownEvent ()
156 lock (request_lock)
157 pending_request.Cleanup ();
159 try {
160 ShutdownHook ();
161 } catch (Exception ex) {
162 Logger.Log.Warn (ex, "Caught exception in shutdown hook");
166 /////////////////////////////////////////
168 virtual public bool AcceptQuery (Query query)
170 // Accept all queries by default.
171 return true;
174 /////////////////////////////////////////
176 virtual protected bool HitIsValid (Uri uri)
178 return true;
181 virtual protected bool HitFilter (Hit hit)
183 return true;
186 /////////////////////////////////////////
188 // DEPRECATED: This does nothing, since everything is now
189 // time-based.
190 virtual protected double RelevancyMultiplier (Hit hit)
192 return 1.0;
195 static protected double HalfLifeMultiplier (DateTime dt, int half_life_days)
197 double days = Math.Abs ((DateTime.Now - dt).TotalDays);
198 if (days < 0)
199 return 1.0f;
200 return Math.Pow (0.5, days / (double) half_life_days);
203 // FIXME: A decaying half-life is a little sketchy, since data
204 // will eventually decay beyond the epsilon and be dropped
205 // from the results entirely, which is almost never what we
206 // want, particularly in searches with a few number of
207 // results. But with a default half-life of 6 months, it'll
208 // take over 13 years to fully decay outside the epsilon on
209 // this multiplier alone.
210 static protected double HalfLifeMultiplier (DateTime time)
212 // Default relevancy half-life is six months.
213 return HalfLifeMultiplier (time, 182);
216 static protected double HalfLifeMultiplierFromProperty (Hit hit,
217 double default_multiplier,
218 params object [] properties)
220 double best_m = -1.0;
222 foreach (object obj in properties) {
223 string key = obj as string;
224 string val = hit [key];
225 if (val != null) {
226 DateTime dt = StringFu.StringToDateTime (val);
227 double this_m;
228 this_m = HalfLifeMultiplier (dt, 182); /* 182 days == six months */
229 if (this_m > best_m)
230 best_m = this_m;
234 if (best_m < 0)
235 best_m = default_multiplier;
236 return best_m;
239 /////////////////////////////////////////
241 // *** FIXME *** FIXME *** FIXME *** FIXME ***
242 // When we rename a directory, we need to somehow
243 // propagate change information to files under that
244 // directory. Example: say that file foo is in
245 // directory bar, and there is an open query that
246 // matches foo. The tile probably says something
247 // like "foo, in folder bar".
248 // Then assume I rename bar to baz. That notification
249 // will go out, so a query matching bar will get
250 // updated... but the query matching foo will not.
251 // What should really happen is that the tile
252 // should change to say "foo, in folder baz".
253 // But making that work will require some hacking
254 // on the QueryResults.
255 // *** FIXME *** FIXME *** FIXME *** FIXME ***
257 private class ChangeData : IQueryableChangeData {
259 // These get fed back to LuceneQueryingDriver.DoQuery
260 // as a search subset, and hence need to be internal
261 // Uris when we are remapping.
262 public ICollection AddedUris;
264 // These get reported directly to clients in
265 // Subtract events, and thus need to be external Uris
266 // when we are remapping.
267 public ICollection RemovedUris;
270 public void DoQuery (Query query,
271 IQueryResult query_result,
272 IQueryableChangeData i_change_data)
274 ChangeData change_data = (ChangeData) i_change_data;
276 ICollection added_uris = null;
278 // Index listeners never return any initial matches.
279 if (change_data == null && query.IsIndexListener)
280 return;
282 if (change_data != null) {
284 if (change_data.RemovedUris != null)
285 query_result.Subtract (change_data.RemovedUris);
287 // If nothing was added, we can safely return now: this change
288 // cannot have any further effect on an outstanding live query.
289 if (change_data.AddedUris == null
290 || change_data.AddedUris.Count == 0)
291 return;
293 added_uris = change_data.AddedUris;
295 // If this is an index listener, we don't need to do a query:
296 // we just build up synthethic hits and add them unconditionally.
297 if (query.IsIndexListener) {
298 ArrayList synthetic_hits = new ArrayList ();
299 foreach (Uri uri in added_uris) {
300 if (our_uri_filter != null) {
301 bool accept = false;
303 try {
304 accept = our_uri_filter (uri);
305 } catch (Exception e) {
306 Log.Warn (e, "Caught an exception in HitIsValid for {0}", uri);
309 if (! accept)
310 continue;
313 Hit hit = new Hit ();
314 hit.Uri = uri;
316 if (our_hit_filter != null) {
317 bool accept = false;
319 try {
320 accept = our_hit_filter (hit);
321 } catch (Exception e) {
322 Log.Warn (e, "Caught an exception in HitFilter for {0}", hit.Uri);
325 if (! accept)
326 continue;
329 synthetic_hits.Add (hit);
331 if (synthetic_hits.Count > 0)
332 query_result.Add (synthetic_hits);
333 return;
337 Driver.DoQuery (query,
338 query_result,
339 added_uris,
340 our_uri_filter,
341 our_hit_filter);
344 /////////////////////////////////////////
346 protected string GetSnippetFromTextCache (string [] query_terms, Uri uri)
348 // Look up the hit in our text cache. If it is there,
349 // use the cached version to generate a snippet.
351 TextReader reader;
352 reader = TextCache.UserCache.GetReader (uri);
353 if (reader == null)
354 return null;
356 string snippet = SnippetFu.GetSnippet (query_terms, reader);
357 reader.Close ();
359 return snippet;
362 // When remapping, override this with
363 // return GetSnippetFromTextCache (query_terms, remapping_fn (hit.Uri))
364 virtual public string GetSnippet (string [] query_terms, Hit hit)
366 return GetSnippetFromTextCache (query_terms, hit.Uri);
369 /////////////////////////////////////////
371 private int progress_percent = -1;
372 private QueryableState state = QueryableState.Idle;
373 private DateTime last_state_change = DateTime.MinValue;
375 public QueryableStatus GetQueryableStatus ()
377 QueryableStatus status = new QueryableStatus ();
379 status.State = state;
380 status.ProgressPercent = progress_percent;
382 // If we're in read-only mode, query the driver
383 // and not the indexer for the item count.
384 if (indexer == null)
385 status.ItemCount = driver.GetItemCount ();
386 else
387 status.ItemCount = indexer.GetItemCount ();
389 // Frequent state changes are common, and there isn't
390 // a real state machine with continuity when it comes
391 // to the indexing process. A delayed indexing task,
392 // for example, might not actually run for several
393 // seconds after it is scheduled. In this case, the
394 // backend might be in an "Idle" state, but the
395 // indexing process clearly isn't done. To work
396 // around this, we also track the last time the state
397 // changed. If it's less than some threshold, then
398 // we consider ourselves to still be in the process of
399 // indexing.
400 if (state != QueryableState.NotApplicable
401 && (state != QueryableState.Idle
402 || (DateTime.Now - last_state_change).TotalSeconds <= 30))
403 status.IsIndexing = true;
405 return status;
408 public QueryableState State {
409 get { return this.state; }
410 set {
411 //Logger.Log.Debug ("State {0}: {1} -> {2}", this, this.state, value);
413 this.state = value;
414 this.last_state_change = DateTime.Now;
418 public int ProgressPercent {
419 get { return this.progress_percent; }
420 set { this.progress_percent = value; }
423 /////////////////////////////////////////
425 public FileStream ReadDataStream (string name)
427 string path = Path.Combine (Path.Combine (PathFinder.IndexDir, this.IndexName), name);
429 if (!File.Exists (path))
430 return null;
432 return new FileStream (path, System.IO.FileMode.Open, FileAccess.Read);
435 public string ReadDataLine (string name)
437 FileStream stream = ReadDataStream (name);
439 if (stream == null)
440 return null;
442 StreamReader reader = new StreamReader (stream);
443 string line = reader.ReadLine ();
444 reader.Close ();
446 return line;
449 public FileStream WriteDataStream (string name)
451 string path = Path.Combine (Path.Combine (PathFinder.IndexDir, this.IndexName), name);
453 return new FileStream (path, System.IO.FileMode.Create, FileAccess.Write, FileShare.ReadWrite);
456 public void WriteDataLine (string name, string line)
458 if (line == null) {
459 string path = Path.Combine (Path.Combine (PathFinder.IndexDir, this.IndexName), name);
461 if (File.Exists (path))
462 File.Delete (path);
464 return;
467 FileStream stream = WriteDataStream (name);
468 StreamWriter writer = new StreamWriter (stream);
469 writer.WriteLine (line);
470 writer.Close ();
473 // Everything needed to write the attributes of a file after all its children
474 // is indexed
475 private class ParentIndexableInfo {
476 public Indexable Indexable;
477 public IndexerAddedReceipt Receipt;
478 public DateTime LastChildIndexTime;
479 public int NumChildLeft;
482 private Hashtable parent_indexable_table = UriFu.NewHashtable ();
484 //////////////////////////////////////////////////////////////////////////////////
486 // More hooks. These are mostly here for the file system backend.
488 virtual protected bool PreAddIndexableHook (Indexable indexable)
490 // By default, we like everything.
491 return true;
494 // If we are remapping Uris, indexables should be added to the
495 // index with the internal Uri attached. This the receipt
496 // will come back w/ an internal Uri. In order for change
497 // notification to work correctly, we have to map it to
498 // an external Uri.
499 virtual protected void PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
501 // Does nothing by default
504 // Inform backends that the indexable is completely indexed including all children
505 // Pass in the top-level parent indexable, indexeradded receipt for that indexable
506 // and the time when the receipt about the last child indexing was received
507 virtual protected void PostChildrenIndexedHook (Indexable indexable, IndexerAddedReceipt receipt, DateTime Mtime)
509 // Does nothing by default
512 virtual protected void PostRemoveHook (Indexable indexable, IndexerRemovedReceipt receipt)
514 // Does nothing by default
517 //////////////////////////////////////////////////////////////////////////////////
519 // Adding a single indexable
521 private class AddTask : Scheduler.Task {
522 LuceneQueryable queryable;
523 Indexable indexable;
525 public AddTask (LuceneQueryable queryable,
526 Indexable indexable)
528 this.queryable = queryable;
529 this.indexable = indexable;
530 this.Tag = indexable.DisplayUri.ToString ();
531 this.Weight = 1;
534 override protected void DoTaskReal ()
536 QueryableState old_state = queryable.State;
537 queryable.State = QueryableState.Indexing;
539 if (queryable.PreAddIndexableHook (indexable)) {
540 queryable.AddIndexable (indexable);
542 if (Priority == Scheduler.Priority.Immediate)
543 queryable.Flush ();
544 else
545 queryable.ConditionalFlush ();
548 queryable.State = old_state;
551 override protected void DoCleanup ()
553 indexable.Cleanup ();
557 public Scheduler.Task NewAddTask (Indexable indexable)
559 AddTask task;
560 task = new AddTask (this, indexable);
561 task.Source = this;
562 return task;
565 //////////////////////////////////////////////////////////////////////////////////
567 // Adding an indexable generator
569 private class AddGeneratorTask : Scheduler.Task {
570 LuceneQueryable queryable;
571 IIndexableGenerator generator;
573 public AddGeneratorTask (LuceneQueryable queryable,
574 IIndexableGenerator generator)
576 this.queryable = queryable;
577 this.generator = generator;
578 this.Tag = generator.StatusName;
581 override protected void DoTaskReal ()
583 // Since this is a generator, we want the task to
584 // get re-scheduled after it is run.
585 Reschedule = true;
587 QueryableState old_state = queryable.State;
588 queryable.State = QueryableState.Indexing;
590 // Number of times a null indexable was returned. We don't want
591 // to spin tightly in a loop here if we're not actually indexing
592 // things.
593 int misfires = 0;
595 do {
596 if (! generator.HasNextIndexable ()) {
597 // Of course, don't reschedule if there is no more work to do.
598 Reschedule = false;
599 break;
602 Indexable generated;
603 generated = generator.GetNextIndexable ();
605 // Note that the indexable generator can return null.
606 // This means that the generator didn't have an indexable
607 // to return this time through, but it does not mean that
608 // its processing queue is empty.
609 if (generated == null) {
610 misfires++;
612 if (misfires > 179) // Another totally arbitrary number
613 break;
614 else
615 continue;
618 if (queryable.PreAddIndexableHook (generated))
619 queryable.AddIndexable (generated);
620 else
621 generated.Cleanup ();
623 // We keep adding indexables until a flush goes through.
624 } while (! queryable.ConditionalFlush ());
626 generator.PostFlushHook ();
628 queryable.State = old_state;
631 override protected void DoCleanup ()
636 public Scheduler.Task NewAddTask (IIndexableGenerator generator)
638 AddGeneratorTask task;
639 task = new AddGeneratorTask (this, generator);
640 task.Source = this;
641 return task;
644 //////////////////////////////////////////////////////////////////////////////////
646 // There used to be a separate type of task for doing removes.
647 // This is all that remains of that old code.
648 public Scheduler.Task NewRemoveTask (Uri uri)
650 Indexable indexable;
651 indexable = new Indexable (IndexableType.Remove, uri);
653 return NewAddTask (indexable);
656 //////////////////////////////////////////////////////////////////////////////////
658 public Scheduler.Task NewRemoveByPropertyTask (Property prop)
660 PropertyRemovalGenerator prg = new PropertyRemovalGenerator (driver, prop);
662 return NewAddTask (prg);
665 ///////////////////////////////////////////////////////////////////////////////////
668 // An IIndexableGenerator that returns remove Indexables for
669 // all items which match a certain property
672 private class PropertyRemovalGenerator : IIndexableGenerator {
674 private LuceneQueryingDriver driver;
675 private Property prop_to_match;
676 private Uri[] uris_to_remove;
677 private int idx;
679 public PropertyRemovalGenerator (LuceneQueryingDriver driver, Property prop)
681 this.driver = driver;
682 this.prop_to_match = prop;
685 public Indexable GetNextIndexable ()
687 Indexable indexable;
689 indexable = new Indexable (IndexableType.Remove, uris_to_remove [idx]);
690 idx++;
692 return indexable;
695 public bool HasNextIndexable ()
697 if (uris_to_remove == null)
698 uris_to_remove = this.driver.PropertyQuery (this.prop_to_match);
700 if (idx < uris_to_remove.Length)
701 return true;
702 else
703 return false;
706 public string StatusName {
707 get {
708 return String.Format ("Removing {0}={1}", prop_to_match.Key, prop_to_match.Value);
712 public void PostFlushHook () { }
716 //////////////////////////////////////////////////////////////////////////////////
718 // When all other tasks are complete, we need to do a final flush.
719 // We schedule that as a maintenance task.
721 private class FinalFlushTask : Scheduler.Task {
722 LuceneQueryable queryable;
724 public FinalFlushTask (LuceneQueryable queryable)
726 this.queryable = queryable;
730 override protected void DoTaskReal ()
732 queryable.Flush ();
736 private void ScheduleFinalFlush ()
738 if (our_final_flush_task == null) {
739 our_final_flush_task = new FinalFlushTask (this);
741 our_final_flush_task.Tag = "Final Flush for " + IndexName;
742 our_final_flush_task.Priority = Scheduler.Priority.Maintenance;
743 our_final_flush_task.SubPriority = 100; // do this first when starting maintenance
744 our_final_flush_task.Source = this;
747 ThisScheduler.Add (our_final_flush_task);
751 //////////////////////////////////////////////////////////////////////////////////
753 // Optimize the index
755 private DateTime last_optimize_time = DateTime.MinValue;
757 public DateTime LastOptimizeTime {
758 get { return last_optimize_time; }
759 set { last_optimize_time = value; }
762 private class OptimizeTask : Scheduler.Task {
763 LuceneQueryable queryable;
765 public OptimizeTask (LuceneQueryable queryable)
767 this.queryable = queryable;
770 override protected void DoTaskReal ()
772 queryable.Optimize ();
773 queryable.LastOptimizeTime = DateTime.Now;
777 public Scheduler.Task NewOptimizeTask ()
779 Scheduler.Task task;
780 task = new OptimizeTask (this);
781 task.Tag = "Optimize " + IndexName;
782 task.Priority = Scheduler.Priority.Maintenance;
783 task.Source = this;
785 return task;
788 private void OnOptimizeAllEvent ()
790 Scheduler.Task task;
791 task = NewOptimizeTask (); // construct an optimizer task
792 task.Priority = Scheduler.Priority.Delayed; // but boost the priority
793 ThisScheduler.Add (task);
796 private void ScheduleOptimize ()
798 double optimize_delay;
800 // Really we only want to optimize at most once a day, even if we have
801 // indexed a ton of dat
802 TimeSpan span = DateTime.Now - last_optimize_time;
803 if (span.TotalDays > 1.0)
804 optimize_delay = 10.0; // minutes;
805 else
806 optimize_delay = (new TimeSpan (TimeSpan.TicksPerDay) - span).TotalMinutes;
808 if (our_optimize_task == null)
809 our_optimize_task = NewOptimizeTask ();
811 if (OptimizeRightAway || Environment.GetEnvironmentVariable ("BEAGLE_UNDER_BLUDGEON") != null)
812 optimize_delay = 1/120.0; // half a second
814 // Changing the trigger time of an already-scheduled process
815 // does what you would expect.
816 our_optimize_task.TriggerTime = DateTime.Now.AddMinutes (optimize_delay);
818 // Adding the same task more than once is a harmless no-op.
819 ThisScheduler.Add (our_optimize_task);
822 //////////////////////////////////////////////////////////////////////////////////
824 // Other hooks
826 // If this returns true, a task will automatically be created to
827 // add the child.
828 virtual protected bool PreChildAddHook (Indexable child)
830 return true;
833 virtual protected void PreFlushHook (IndexerRequest flushed_request)
836 virtual protected void PostFlushHook (IndexerRequest flushed_request,
837 IndexerReceipt [] receipts)
840 //////////////////////////////////////////////////////////////////////////////////
842 protected void AddIndexable (Indexable indexable)
844 indexable.Source = QueryDriver.GetQueryable (this).Name;
846 lock (request_lock)
847 pending_request.Add (indexable);
849 // Schedule a final flush every time we add anything.
850 // Better safe than sorry.
851 ScheduleFinalFlush ();
854 protected void Optimize ()
856 lock (request_lock) {
857 pending_request.OptimizeIndex = true;
858 Flush ();
862 // Returns true if we actually did flush, false otherwise.
863 protected bool ConditionalFlush ()
865 QueryableState old_state = State;
866 State = QueryableState.Flushing;
868 try {
869 lock (request_lock) {
870 if (pending_request.Count > 37) { // a total arbitrary magic number
871 Flush ();
872 return true;
875 return false;
876 } finally {
877 State = old_state;
881 protected void Flush ()
883 QueryableState old_state = State;
884 State = QueryableState.Flushing;
886 try {
887 DoFlush ();
888 } finally {
889 State = old_state;
893 private void DoFlush ()
895 IndexerRequest flushed_request;
897 lock (request_lock) {
898 if (pending_request.IsEmpty)
899 return;
901 flushed_request = pending_request;
902 pending_request = new IndexerRequest ();
904 // We hold the request_lock when calling PreFlushHook, so
905 // that no other requests can come in until it exits.
906 PreFlushHook (flushed_request);
909 IndexerReceipt [] receipts;
910 receipts = indexer.Flush (flushed_request);
912 PostFlushHook (flushed_request, receipts);
914 // Silently return if we get a null back. This is probably
915 // a bad thing to do.
916 if (receipts == null)
917 return;
919 // Nothing happened (except maybe an optimize, which does not
920 // generate a receipt). Also do nothing.
921 if (receipts.Length == 0)
922 return;
924 // Update the cached count of items in the driver
925 driver.SetItemCount (indexer.GetItemCount ());
927 // Something happened, so schedule an optimize just in case.
928 ScheduleOptimize ();
930 if (fa_store != null)
931 fa_store.BeginTransaction ();
933 ArrayList added_uris = new ArrayList ();
934 ArrayList removed_uris = new ArrayList ();
936 bool[] indexable_added_receipt_index = new bool [receipts.Length];
937 int[] child_added_receipt_count = new int [receipts.Length];
939 for (int i = 0; i < receipts.Length; ++i) {
940 child_added_receipt_count [i] = 0;
941 indexable_added_receipt_index [i] = false;
943 if (receipts [i] is IndexerAddedReceipt) {
944 // Process IndexerAddedReceipt after knowing if there are any
945 // child of the indexable yet to be indexed
946 indexable_added_receipt_index [i] = true;
947 } else if (receipts [i] is IndexerRemovedReceipt) {
949 IndexerRemovedReceipt r;
950 r = (IndexerRemovedReceipt) receipts [i];
952 // Drop the removed item from the text cache
953 TextCache.UserCache.Delete (r.Uri);
956 // Call the appropriate hook
957 try {
958 PostRemoveHook (flushed_request.GetByUri (r.Uri), r);
959 } catch (Exception ex) {
960 Logger.Log.Warn (ex, "Caught exception in PostRemoveHook '{0}'",
961 r.Uri);
964 // Add the removed Uri to the list for our
965 // change data. This will be an external Uri
966 // when we are remapping.
967 removed_uris.Add (r.Uri);
969 } else if (receipts [i] is IndexerChildIndexablesReceipt) {
971 IndexerChildIndexablesReceipt r;
972 r = (IndexerChildIndexablesReceipt) receipts [i];
974 foreach (Indexable child in r.Children) {
975 bool please_add_a_new_task = false;
977 try {
978 please_add_a_new_task = PreChildAddHook (child);
979 } catch (InvalidOperationException ex) {
980 // Queryable does not support adding children
981 } catch (Exception ex) {
982 Logger.Log.Warn (ex, "Caught exception in PreChildAddHook '{0}'", child.DisplayUri);
985 if (please_add_a_new_task) {
986 Log.Debug ("Adding child {0} to parent {1}", child.Uri, child.ParentUri);
987 Scheduler.Task task = NewAddTask (child);
988 task.SubPriority = 1;
989 ThisScheduler.Add (task);
991 // value at 'i' = number of successful children
992 child_added_receipt_count [i] ++;
993 } else
994 child.Cleanup ();
999 for (int i = 0; i < receipts.Length; ++i) {
1000 if (child_added_receipt_count [i] == 0)
1001 continue;
1003 IndexerChildIndexablesReceipt r;
1004 r = (IndexerChildIndexablesReceipt) receipts [i];
1006 if (r.Children == null)
1007 continue;
1009 // Use first child to get parent uri since all children will share same parent
1010 Indexable child = (Indexable) r.Children [0];
1011 ParentIndexableInfo info;
1013 Uri parent_uri = child.ParentUri;
1014 info = (ParentIndexableInfo) parent_indexable_table [parent_uri];
1015 if (info != null) {
1016 info.NumChildLeft += r.Children.Count;
1017 if (Debug)
1018 Log.Debug ("Add {2} children to {0}. (to-index {1})", info.Indexable.Uri, info.NumChildLeft, r.Children.Count);
1019 continue;
1022 // Need to figure out the indexeraddedreceipt for r.indexable
1023 IndexerAddedReceipt added_receipt = null;
1024 // FIXME: Huge assumption on how LuceneIndexingDriver works
1025 // Assuming that IndexingDriver sends the addedreceipt for the
1026 // main indexable and the childreceipts for added children in
1027 // the same response.
1028 for (int j = 0; j < receipts.Length; ++j) {
1029 if (! indexable_added_receipt_index [j])
1030 continue;
1032 added_receipt = (IndexerAddedReceipt) receipts [j];
1033 if (added_receipt.Uri == parent_uri)
1034 break;
1037 // Just being cautious
1038 if (added_receipt == null)
1039 continue;
1041 info = new ParentIndexableInfo ();
1042 info.NumChildLeft = r.Children.Count;
1043 info.LastChildIndexTime = child.Timestamp;
1044 info.Indexable = flushed_request.GetByUri (parent_uri);
1045 info.Receipt = new IndexerAddedReceipt (added_receipt.Uri,
1046 added_receipt.FilterName,
1047 added_receipt.FilterVersion);
1048 parent_indexable_table [info.Indexable.Uri] = info;
1049 if (Debug)
1050 Log.Debug ("Add {2} children to {0}. (to-index {1})", info.Indexable.Uri, info.NumChildLeft, r.Children.Count);
1053 // Process these after knowing what all child indexable receipts were present
1054 for (int i = 0; i < receipts.Length; ++i) {
1055 if (! indexable_added_receipt_index [i])
1056 continue;
1058 IndexerAddedReceipt r = (IndexerAddedReceipt) receipts [i];
1059 if (Debug)
1060 Log.Debug ("AddedReceipt for {0}", r.Uri);
1062 // Add the Uri to the list for our change data
1063 // before doing any post-processing.
1064 // This ensures that we have internal uris when
1065 // we are remapping.
1066 added_uris.Add (r.Uri);
1068 // Call the appropriate hook
1069 try {
1070 // Map from internal->external Uris in the PostAddHook
1071 Indexable submitted_indexable = flushed_request.GetByUri (r.Uri);
1072 IndexerAddedReceipt receipt_copy = new IndexerAddedReceipt (
1073 r.Uri,
1074 r.FilterName,
1075 r.FilterVersion);
1076 PostAddHook (submitted_indexable, r);
1078 ParentIndexableInfo info;
1079 // Check if this indexable has any children
1080 info = (ParentIndexableInfo) parent_indexable_table [submitted_indexable.Uri];
1081 // Indexable has no children
1082 if (info == null) {
1083 Uri parent_uri = submitted_indexable.ParentUri;
1085 if (parent_uri != null) {
1086 // Indexable is itself a child
1087 info = (ParentIndexableInfo) parent_indexable_table [parent_uri];
1090 if (info == null)
1091 PostChildrenIndexedHook (submitted_indexable, receipt_copy, submitted_indexable.Timestamp);
1092 else {
1093 // This indexable is a child registered earlier
1094 info.NumChildLeft --;
1095 info.LastChildIndexTime = submitted_indexable.Timestamp;
1096 if (Debug)
1097 Log.Debug ("Finished indexing child for {0} ({1} child left)", info.Indexable.Uri, info.NumChildLeft);
1100 } catch (Exception ex) {
1101 Logger.Log.Warn (ex, "Caught exception in PostAddHook or PostChildrenIndexedHook '{0}' '{1}' '{2}'",
1102 r.Uri, r.FilterName, r.FilterVersion);
1105 // Every added Uri also needs to be listed as removed,
1106 // to avoid duplicate hits in the query. Since the
1107 // removed Uris need to be external Uris, we add them
1108 // to the list *after* post-processing.
1109 removed_uris.Add (r.Uri);
1112 ArrayList to_remove = new ArrayList ();
1113 foreach (ParentIndexableInfo info in parent_indexable_table.Values) {
1114 if (info.NumChildLeft > 0)
1115 continue;
1117 if (Debug)
1118 Log.Debug ("{0} has no more children left, removing", info.Indexable.Uri);
1119 PostChildrenIndexedHook (info.Indexable, info.Receipt, info.LastChildIndexTime);
1120 to_remove.Add (info.Indexable.Uri);
1123 foreach (Uri uri in to_remove)
1124 parent_indexable_table.Remove (uri);
1125 if (Debug)
1126 Log.Debug ("parent_indexable_table now contains {0} parent-child", parent_indexable_table.Values.Count);
1128 if (fa_store != null)
1129 fa_store.CommitTransaction ();
1131 // Propagate the change notification to any open queries.
1132 if (added_uris.Count > 0 || removed_uris.Count > 0) {
1133 ChangeData change_data;
1134 change_data = new ChangeData ();
1135 change_data.AddedUris = added_uris;
1136 change_data.RemovedUris = removed_uris;
1138 QueryDriver.QueryableChanged (this, change_data);
1142 //////////////////////////////////////////////////////////////////////////////////
1145 // It is often convenient to have easy access to a FileAttributeStore
1148 virtual protected IFileAttributesStore BuildFileAttributesStore ()
1150 if (ExtendedAttribute.Supported)
1151 return new FileAttributesStore_ExtendedAttribute (IndexFingerprint);
1152 else
1153 return new FileAttributesStore_Sqlite (IndexDirectory, IndexFingerprint);
1157 public FileAttributesStore FileAttributesStore {
1158 get {
1159 if (fa_store == null)
1160 fa_store = new FileAttributesStore (BuildFileAttributesStore ());
1161 return fa_store;
1165 //////////////////////////////////////////////////////////////////////////////////
1167 virtual protected LuceneQueryingDriver BuildLuceneQueryingDriver (string index_name,
1168 int minor_version,
1169 bool read_only_mode)
1171 return new LuceneQueryingDriver (index_name, minor_version, read_only_mode);