Some more fixes wrt child-indexables. Namely, fix proper handling of child indexables...
[beagle.git] / beagled / LuceneQueryable.cs
blob6caedeef29de22c5d2e70e64cea203848db353cb
1 //
2 // LuceneQueryable.cs
3 //
4 // Copyright (C) 2004-2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
31 using Beagle.Util;
33 namespace Beagle.Daemon {
35 public abstract class LuceneQueryable : IQueryable {
37 static public bool Debug = false;
38 static public bool OptimizeRightAway = false;
40 public delegate IIndexer IndexerCreator (string name, int minor_version);
42 static private IndexerCreator indexer_hook = null;
44 static public IndexerCreator IndexerHook {
45 set { indexer_hook = value; }
48 virtual protected IIndexer LocalIndexerHook ()
50 return null;
53 //////////////////////////////////////////////////////////
55 public delegate void OptimizeAllHandler ();
57 static private OptimizeAllHandler OptimizeAllEvent;
59 static public void OptimizeAll ()
61 if (OptimizeAllEvent != null)
62 OptimizeAllEvent ();
65 //////////////////////////////////////////////////////////
67 private Scheduler scheduler = Scheduler.Global;
68 private FileAttributesStore fa_store = null;
70 private string index_name;
71 private int minor_version;
72 private bool read_only_mode;
74 private LuceneQueryingDriver driver;
75 private IIndexer indexer = null;
77 private LuceneQueryingDriver.UriFilter our_uri_filter;
78 private LuceneCommon.HitFilter our_hit_filter;
79 private Scheduler.Task our_final_flush_task = null;
80 private Scheduler.Task our_optimize_task = null;
82 private object request_lock = new object ();
83 private IndexerRequest pending_request = new IndexerRequest ();
85 //////////////////////////////////////////////////////////
87 public LuceneQueryable (string index_name) : this (index_name, -1, false) { }
89 public LuceneQueryable (string index_name, bool read_only_mode) : this (index_name, -1, read_only_mode) { }
91 public LuceneQueryable (string index_name, int minor_version) : this (index_name, minor_version, false) { }
93 public LuceneQueryable (string index_name, int minor_version, bool read_only_mode)
95 this.index_name = index_name;
96 this.minor_version = minor_version;
97 this.read_only_mode = read_only_mode;
99 driver = BuildLuceneQueryingDriver (this.index_name, this.minor_version, this.read_only_mode);
100 our_uri_filter = new LuceneQueryingDriver.UriFilter (this.HitIsValid);
101 our_hit_filter = new LuceneCommon.HitFilter (this.HitFilter);
103 // If the queryable is in read-only more, don't
104 // instantiate an indexer for it.
105 if (read_only_mode)
106 return;
108 indexer = LocalIndexerHook ();
109 if (indexer == null && indexer_hook != null)
110 indexer = indexer_hook (this.index_name, this.minor_version);
112 OptimizeAllEvent += OnOptimizeAllEvent;
114 // Schedule an optimize, just in case
115 ScheduleOptimize ();
117 Shutdown.ShutdownEvent += new Shutdown.ShutdownHandler (OnShutdownEvent);
120 protected string IndexName {
121 get { return index_name; }
124 protected string IndexDirectory {
125 get { return driver.TopDirectory; }
128 protected string IndexFingerprint {
129 get { return driver.Fingerprint; }
132 protected LuceneQueryingDriver Driver {
133 get { return driver; }
136 public Scheduler ThisScheduler {
137 get { return scheduler; }
140 /////////////////////////////////////////
142 virtual public void Start ()
147 /////////////////////////////////////////
149 virtual protected void ShutdownHook ()
154 private void OnShutdownEvent ()
156 lock (request_lock)
157 pending_request.Cleanup ();
159 try {
160 ShutdownHook ();
161 } catch (Exception ex) {
162 Logger.Log.Warn (ex, "Caught exception in shutdown hook");
166 /////////////////////////////////////////
168 virtual public bool AcceptQuery (Query query)
170 // Accept all queries by default.
171 return true;
174 /////////////////////////////////////////
176 virtual protected bool HitIsValid (Uri uri)
178 return true;
181 virtual protected bool HitFilter (Hit hit)
183 return true;
186 /////////////////////////////////////////
188 // DEPRECATED: This does nothing, since everything is now
189 // time-based.
190 virtual protected double RelevancyMultiplier (Hit hit)
192 return 1.0;
195 static protected double HalfLifeMultiplier (DateTime dt, int half_life_days)
197 double days = Math.Abs ((DateTime.Now - dt).TotalDays);
198 if (days < 0)
199 return 1.0f;
200 return Math.Pow (0.5, days / (double) half_life_days);
203 // FIXME: A decaying half-life is a little sketchy, since data
204 // will eventually decay beyond the epsilon and be dropped
205 // from the results entirely, which is almost never what we
206 // want, particularly in searches with a few number of
207 // results. But with a default half-life of 6 months, it'll
208 // take over 13 years to fully decay outside the epsilon on
209 // this multiplier alone.
210 static protected double HalfLifeMultiplier (DateTime time)
212 // Default relevancy half-life is six months.
213 return HalfLifeMultiplier (time, 182);
216 static protected double HalfLifeMultiplierFromProperty (Hit hit,
217 double default_multiplier,
218 params object [] properties)
220 double best_m = -1.0;
222 foreach (object obj in properties) {
223 string key = obj as string;
224 string val = hit [key];
225 if (val != null) {
226 DateTime dt = StringFu.StringToDateTime (val);
227 double this_m;
228 this_m = HalfLifeMultiplier (dt, 182); /* 182 days == six months */
229 if (this_m > best_m)
230 best_m = this_m;
234 if (best_m < 0)
235 best_m = default_multiplier;
236 return best_m;
239 /////////////////////////////////////////
241 // *** FIXME *** FIXME *** FIXME *** FIXME ***
242 // When we rename a directory, we need to somehow
243 // propagate change information to files under that
244 // directory. Example: say that file foo is in
245 // directory bar, and there is an open query that
246 // matches foo. The tile probably says something
247 // like "foo, in folder bar".
248 // Then assume I rename bar to baz. That notification
249 // will go out, so a query matching bar will get
250 // updated... but the query matching foo will not.
251 // What should really happen is that the tile
252 // should change to say "foo, in folder baz".
253 // But making that work will require some hacking
254 // on the QueryResults.
255 // *** FIXME *** FIXME *** FIXME *** FIXME ***
257 private class ChangeData : IQueryableChangeData {
259 // These get fed back to LuceneQueryingDriver.DoQuery
260 // as a search subset, and hence need to be internal
261 // Uris when we are remapping.
262 public ICollection AddedUris;
264 // These get reported directly to clients in
265 // Subtract events, and thus need to be external Uris
266 // when we are remapping.
267 public ICollection RemovedUris;
270 public void DoQuery (Query query,
271 IQueryResult query_result,
272 IQueryableChangeData i_change_data)
274 ChangeData change_data = (ChangeData) i_change_data;
276 ICollection added_uris = null;
278 // Index listeners never return any initial matches.
279 if (change_data == null && query.IsIndexListener)
280 return;
282 if (change_data != null) {
284 if (change_data.RemovedUris != null)
285 query_result.Subtract (change_data.RemovedUris);
287 // If nothing was added, we can safely return now: this change
288 // cannot have any further effect on an outstanding live query.
289 if (change_data.AddedUris == null
290 || change_data.AddedUris.Count == 0)
291 return;
293 added_uris = change_data.AddedUris;
295 // If this is an index listener, we don't need to do a query:
296 // we just build up synthethic hits and add them unconditionally.
297 if (query.IsIndexListener) {
298 ArrayList synthetic_hits = new ArrayList ();
299 foreach (Uri uri in added_uris) {
300 if (our_uri_filter != null) {
301 bool accept = false;
303 try {
304 accept = our_uri_filter (uri);
305 } catch (Exception e) {
306 Log.Warn (e, "Caught an exception in HitIsValid for {0}", uri);
309 if (! accept)
310 continue;
313 Hit hit = new Hit ();
314 hit.Uri = uri;
316 if (our_hit_filter != null) {
317 bool accept = false;
319 try {
320 accept = our_hit_filter (hit);
321 } catch (Exception e) {
322 Log.Warn (e, "Caught an exception in HitFilter for {0}", hit.Uri);
325 if (! accept)
326 continue;
329 synthetic_hits.Add (hit);
331 if (synthetic_hits.Count > 0)
332 query_result.Add (synthetic_hits);
333 return;
337 Driver.DoQuery (query,
338 query_result,
339 added_uris,
340 our_uri_filter,
341 our_hit_filter);
344 /////////////////////////////////////////
346 protected string GetSnippetFromTextCache (string [] query_terms, Uri uri)
348 // Look up the hit in our text cache. If it is there,
349 // use the cached version to generate a snippet.
351 TextReader reader;
352 reader = TextCache.UserCache.GetReader (uri);
353 if (reader == null)
354 return null;
356 string snippet = SnippetFu.GetSnippet (query_terms, reader);
357 reader.Close ();
359 return snippet;
362 // When remapping, override this with
363 // return GetSnippetFromTextCache (query_terms, remapping_fn (hit.Uri))
364 virtual public string GetSnippet (string [] query_terms, Hit hit)
366 return GetSnippetFromTextCache (query_terms, hit.Uri);
369 /////////////////////////////////////////
371 private DateTime last_state_change = DateTime.MinValue;
373 public QueryableStatus GetQueryableStatus ()
375 QueryableStatus status = new QueryableStatus ();
377 status.ProgressPercent = this.ProgressPercent;
379 // If we're in read-only mode, query the driver
380 // and not the indexer for the item count.
381 if (indexer == null)
382 status.ItemCount = driver.GetItemCount ();
383 else
384 status.ItemCount = indexer.GetItemCount ();
386 status.IsIndexing = this.IsIndexing;
388 return status;
391 protected virtual bool IsIndexing {
392 get { return false; }
395 protected virtual int ProgressPercent {
396 get { return -1; }
399 /////////////////////////////////////////
401 public FileStream ReadDataStream (string name)
403 string path = Path.Combine (Path.Combine (PathFinder.IndexDir, this.IndexName), name);
405 if (!File.Exists (path))
406 return null;
408 return new FileStream (path, System.IO.FileMode.Open, FileAccess.Read);
411 public string ReadDataLine (string name)
413 FileStream stream = ReadDataStream (name);
415 if (stream == null)
416 return null;
418 StreamReader reader = new StreamReader (stream);
419 string line = reader.ReadLine ();
420 reader.Close ();
422 return line;
425 public FileStream WriteDataStream (string name)
427 string path = Path.Combine (Path.Combine (PathFinder.IndexDir, this.IndexName), name);
429 return new FileStream (path, System.IO.FileMode.Create, FileAccess.Write, FileShare.ReadWrite);
432 public void WriteDataLine (string name, string line)
434 if (line == null) {
435 string path = Path.Combine (Path.Combine (PathFinder.IndexDir, this.IndexName), name);
437 if (File.Exists (path))
438 File.Delete (path);
440 return;
443 FileStream stream = WriteDataStream (name);
444 StreamWriter writer = new StreamWriter (stream);
445 writer.WriteLine (line);
446 writer.Close ();
449 // Everything needed to write the attributes of a file after all its children
450 // is indexed
451 private class ParentIndexableInfo {
452 public Indexable Indexable;
453 public IndexerAddedReceipt Receipt;
454 public DateTime LastChildIndexTime;
455 public int NumChildLeft;
458 private Hashtable parent_indexable_table = UriFu.NewHashtable ();
460 //////////////////////////////////////////////////////////////////////////////////
462 // More hooks. These are mostly here for the file system backend.
464 virtual protected bool PreAddIndexableHook (Indexable indexable)
466 // By default, we like everything.
467 return true;
470 // If we are remapping Uris, indexables should be added to the
471 // index with the internal Uri attached. This the receipt
472 // will come back w/ an internal Uri. In order for change
473 // notification to work correctly, we have to map it to
474 // an external Uri.
475 virtual protected void PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
477 // Does nothing by default
480 // Inform backends that the indexable is completely indexed including all children
481 // Pass in the top-level parent indexable, indexeradded receipt for that indexable
482 // and the time when the receipt about the last child indexing was received
483 virtual protected void PostChildrenIndexedHook (Indexable indexable, IndexerAddedReceipt receipt, DateTime Mtime)
485 // Does nothing by default
488 virtual protected void PostRemoveHook (Indexable indexable, IndexerRemovedReceipt receipt)
490 // Does nothing by default
493 //////////////////////////////////////////////////////////////////////////////////
495 // Adding a single indexable
497 private class AddTask : Scheduler.Task {
498 LuceneQueryable queryable;
499 Indexable indexable;
501 public AddTask (LuceneQueryable queryable,
502 Indexable indexable)
504 this.queryable = queryable;
505 this.indexable = indexable;
506 this.Tag = indexable.DisplayUri.ToString ();
507 this.Weight = 1;
510 override protected void DoTaskReal ()
512 if (queryable.PreAddIndexableHook (indexable)) {
513 queryable.AddIndexable (indexable);
515 if (Priority == Scheduler.Priority.Immediate)
516 queryable.Flush ();
517 else
518 queryable.ConditionalFlush ();
522 override protected void DoCleanup ()
524 indexable.Cleanup ();
528 public Scheduler.Task NewAddTask (Indexable indexable)
530 AddTask task;
531 task = new AddTask (this, indexable);
532 task.Source = this;
533 return task;
536 //////////////////////////////////////////////////////////////////////////////////
538 // Adding an indexable generator
540 private class AddGeneratorTask : Scheduler.Task {
541 LuceneQueryable queryable;
542 IIndexableGenerator generator;
544 public AddGeneratorTask (LuceneQueryable queryable,
545 IIndexableGenerator generator)
547 this.queryable = queryable;
548 this.generator = generator;
549 this.Tag = generator.StatusName;
552 override protected void DoTaskReal ()
554 // Since this is a generator, we want the task to
555 // get re-scheduled after it is run.
556 Reschedule = true;
558 // Number of times a null indexable was returned. We don't want
559 // to spin tightly in a loop here if we're not actually indexing
560 // things.
561 int misfires = 0;
563 do {
564 if (! generator.HasNextIndexable ()) {
565 // Of course, don't reschedule if there is no more work to do.
566 Reschedule = false;
567 break;
570 Indexable generated;
571 generated = generator.GetNextIndexable ();
573 // Note that the indexable generator can return null.
574 // This means that the generator didn't have an indexable
575 // to return this time through, but it does not mean that
576 // its processing queue is empty.
577 if (generated == null) {
578 misfires++;
580 if (misfires > 179) // Another totally arbitrary number
581 break;
582 else
583 continue;
586 if (queryable.PreAddIndexableHook (generated))
587 queryable.AddIndexable (generated);
588 else
589 generated.Cleanup ();
591 // We keep adding indexables until a flush goes through.
592 } while (! queryable.ConditionalFlush ());
594 generator.PostFlushHook ();
597 override protected void DoCleanup ()
602 public Scheduler.Task NewAddTask (IIndexableGenerator generator)
604 AddGeneratorTask task;
605 task = new AddGeneratorTask (this, generator);
606 task.Source = this;
607 return task;
610 //////////////////////////////////////////////////////////////////////////////////
612 // There used to be a separate type of task for doing removes.
613 // This is all that remains of that old code.
614 public Scheduler.Task NewRemoveTask (Uri uri)
616 Indexable indexable;
617 indexable = new Indexable (IndexableType.Remove, uri);
619 return NewAddTask (indexable);
622 //////////////////////////////////////////////////////////////////////////////////
624 public Scheduler.Task NewRemoveByPropertyTask (Property prop)
626 PropertyRemovalGenerator prg = new PropertyRemovalGenerator (driver, prop);
628 return NewAddTask (prg);
631 ///////////////////////////////////////////////////////////////////////////////////
634 // An IIndexableGenerator that returns remove Indexables for
635 // all items which match a certain property
638 private class PropertyRemovalGenerator : IIndexableGenerator {
640 private LuceneQueryingDriver driver;
641 private Property prop_to_match;
642 private Uri[] uris_to_remove;
643 private int idx;
645 public PropertyRemovalGenerator (LuceneQueryingDriver driver, Property prop)
647 this.driver = driver;
648 this.prop_to_match = prop;
651 public Indexable GetNextIndexable ()
653 Indexable indexable;
655 indexable = new Indexable (IndexableType.Remove, uris_to_remove [idx]);
656 idx++;
658 return indexable;
661 public bool HasNextIndexable ()
663 if (uris_to_remove == null)
664 uris_to_remove = this.driver.PropertyQuery (this.prop_to_match);
666 if (idx < uris_to_remove.Length)
667 return true;
668 else
669 return false;
672 public string StatusName {
673 get {
674 return String.Format ("Removing {0}={1}", prop_to_match.Key, prop_to_match.Value);
678 public void PostFlushHook () { }
682 //////////////////////////////////////////////////////////////////////////////////
684 // When all other tasks are complete, we need to do a final flush.
685 // We schedule that as a maintenance task.
687 private class FinalFlushTask : Scheduler.Task {
688 LuceneQueryable queryable;
690 public FinalFlushTask (LuceneQueryable queryable)
692 this.queryable = queryable;
696 override protected void DoTaskReal ()
698 queryable.Flush ();
702 private void ScheduleFinalFlush ()
704 if (our_final_flush_task == null) {
705 our_final_flush_task = new FinalFlushTask (this);
707 our_final_flush_task.Tag = "Final Flush for " + IndexName;
708 our_final_flush_task.Priority = Scheduler.Priority.Maintenance;
709 our_final_flush_task.SubPriority = 100; // do this first when starting maintenance
710 our_final_flush_task.Source = this;
713 ThisScheduler.Add (our_final_flush_task);
717 //////////////////////////////////////////////////////////////////////////////////
719 // Optimize the index
721 private DateTime last_optimize_time = DateTime.MinValue;
723 public DateTime LastOptimizeTime {
724 get { return last_optimize_time; }
725 set { last_optimize_time = value; }
728 private class OptimizeTask : Scheduler.Task {
729 LuceneQueryable queryable;
731 public OptimizeTask (LuceneQueryable queryable)
733 this.queryable = queryable;
736 override protected void DoTaskReal ()
738 queryable.Optimize ();
739 queryable.LastOptimizeTime = DateTime.Now;
743 public Scheduler.Task NewOptimizeTask ()
745 Scheduler.Task task;
746 task = new OptimizeTask (this);
747 task.Tag = "Optimize " + IndexName;
748 task.Priority = Scheduler.Priority.Maintenance;
749 task.Source = this;
751 return task;
754 private void OnOptimizeAllEvent ()
756 Scheduler.Task task;
757 task = NewOptimizeTask (); // construct an optimizer task
758 task.Priority = Scheduler.Priority.Delayed; // but boost the priority
759 ThisScheduler.Add (task);
762 private void ScheduleOptimize ()
764 double optimize_delay;
766 // Really we only want to optimize at most once a day, even if we have
767 // indexed a ton of dat
768 TimeSpan span = DateTime.Now - last_optimize_time;
769 if (span.TotalDays > 1.0)
770 optimize_delay = 10.0; // minutes;
771 else
772 optimize_delay = (new TimeSpan (TimeSpan.TicksPerDay) - span).TotalMinutes;
774 if (our_optimize_task == null)
775 our_optimize_task = NewOptimizeTask ();
777 if (OptimizeRightAway || Environment.GetEnvironmentVariable ("BEAGLE_UNDER_BLUDGEON") != null)
778 optimize_delay = 1/120.0; // half a second
780 // Changing the trigger time of an already-scheduled process
781 // does what you would expect.
782 our_optimize_task.TriggerTime = DateTime.Now.AddMinutes (optimize_delay);
784 // Adding the same task more than once is a harmless no-op.
785 ThisScheduler.Add (our_optimize_task);
788 //////////////////////////////////////////////////////////////////////////////////
790 // Other hooks
792 // If this returns true, a task will automatically be created to
793 // add the child.
794 virtual protected bool PreChildAddHook (Indexable child)
796 return true;
799 virtual protected void PreFlushHook (IndexerRequest flushed_request)
802 virtual protected void PostFlushHook (IndexerRequest flushed_request,
803 IndexerReceipt [] receipts)
806 //////////////////////////////////////////////////////////////////////////////////
808 protected void AddIndexable (Indexable indexable)
810 indexable.Source = QueryDriver.GetQueryable (this).Name;
812 lock (request_lock)
813 pending_request.Add (indexable);
815 // Schedule a final flush every time we add anything.
816 // Better safe than sorry.
817 ScheduleFinalFlush ();
820 protected void Optimize ()
822 lock (request_lock) {
823 pending_request.OptimizeIndex = true;
824 Flush ();
828 // Returns true if we actually did flush, false otherwise.
829 protected bool ConditionalFlush ()
831 lock (request_lock) {
832 if (pending_request.Count > 37) { // a total arbitrary magic number
833 Flush ();
834 return true;
837 return false;
840 protected void Flush ()
842 IndexerRequest flushed_request;
844 lock (request_lock) {
845 if (pending_request.IsEmpty)
846 return;
848 flushed_request = pending_request;
849 pending_request = new IndexerRequest ();
851 // We hold the request_lock when calling PreFlushHook, so
852 // that no other requests can come in until it exits.
853 PreFlushHook (flushed_request);
856 IndexerReceipt [] receipts;
857 receipts = indexer.Flush (flushed_request);
859 PostFlushHook (flushed_request, receipts);
861 // Silently return if we get a null back. This is probably
862 // a bad thing to do.
863 if (receipts == null)
864 return;
866 // Nothing happened (except maybe an optimize, which does not
867 // generate a receipt). Also do nothing.
868 if (receipts.Length == 0)
869 return;
871 // Update the cached count of items in the driver
872 driver.SetItemCount (indexer.GetItemCount ());
874 // Something happened, so schedule an optimize just in case.
875 ScheduleOptimize ();
877 if (fa_store != null)
878 fa_store.BeginTransaction ();
880 ArrayList added_uris = new ArrayList ();
881 ArrayList removed_uris = new ArrayList ();
883 bool[] indexable_added_receipt_index = new bool [receipts.Length];
884 int[] child_added_receipt_count = new int [receipts.Length];
886 for (int i = 0; i < receipts.Length; ++i) {
887 child_added_receipt_count [i] = 0;
888 indexable_added_receipt_index [i] = false;
890 if (receipts [i] is IndexerAddedReceipt) {
892 // Process IndexerAddedReceipt after knowing if there are any
893 // child of the indexable yet to be indexed
894 indexable_added_receipt_index [i] = true;
896 } else if (receipts [i] is IndexerRemovedReceipt) {
898 IndexerRemovedReceipt r;
899 r = (IndexerRemovedReceipt) receipts [i];
900 HandleRemoveReceipt (r, flushed_request.GetByUri (r.Uri), removed_uris);
902 } else if (receipts [i] is IndexerChildIndexablesReceipt) {
904 IndexerChildIndexablesReceipt r;
905 r = (IndexerChildIndexablesReceipt) receipts [i];
906 HandleChildIndexableReceipt (r, child_added_receipt_count, i);
910 // First process the child receipts
911 for (int i = 0; i < receipts.Length; ++i) {
912 if (child_added_receipt_count [i] == 0)
913 continue;
915 IndexerChildIndexablesReceipt r;
916 r = (IndexerChildIndexablesReceipt) receipts [i];
918 if (r.Children == null)
919 continue;
921 // Use first child to get parent uri since all children will share same parent
922 Indexable child = (Indexable) r.Children [0];
923 ParentIndexableInfo info;
925 Uri parent_uri = child.ParentUri;
926 info = (ParentIndexableInfo) parent_indexable_table [parent_uri];
927 if (info != null) {
928 info.NumChildLeft += r.Children.Count;
929 if (Debug)
930 Log.Debug ("Add {2} children to {0}. (to-index {1})",
931 info.Indexable.Uri,
932 info.NumChildLeft,
933 r.Children.Count);
934 continue;
937 // Need to figure out the indexeraddedreceipt for r.indexable
938 IndexerAddedReceipt added_receipt = null;
940 // FIXME: Huge assumption on how LuceneIndexingDriver works
941 // Assuming that IndexingDriver sends the addedreceipt for the
942 // main indexable and the childreceipts for added children in
943 // the same response.
944 for (int j = 0; j < receipts.Length; ++j) {
945 if (! indexable_added_receipt_index [j])
946 continue;
948 added_receipt = (IndexerAddedReceipt) receipts [j];
949 if (added_receipt.Uri == parent_uri)
950 break;
953 // Just being cautious
954 if (added_receipt == null)
955 continue;
957 // Store the parent-child info for use when child is done indexing
958 info = new ParentIndexableInfo ();
959 info.NumChildLeft = r.Children.Count;
960 info.LastChildIndexTime = child.Timestamp;
961 info.Indexable = flushed_request.GetByUri (parent_uri);
962 info.Receipt = new IndexerAddedReceipt (added_receipt.Uri,
963 added_receipt.FilterName,
964 added_receipt.FilterVersion);
966 parent_indexable_table [info.Indexable.Uri] = info;
967 if (Debug)
968 Log.Debug ("Add {2} children to {0}. (to-index {1})",
969 info.Indexable.Uri,
970 info.NumChildLeft,
971 r.Children.Count);
974 // Process these after knowing what all child indexable receipts were present
975 for (int i = 0; i < receipts.Length; ++i) {
976 if (! indexable_added_receipt_index [i])
977 continue;
979 IndexerAddedReceipt r = (IndexerAddedReceipt) receipts [i];
980 if (Debug)
981 Log.Debug ("AddedReceipt for {0}", r.Uri);
983 // Add the Uri to the list for our change data
984 // before doing any post-processing.
985 // This ensures that we have internal uris when
986 // we are remapping.
987 added_uris.Add (r.Uri);
989 // Call the appropriate hook
990 try {
991 HandleAddReceipt (r, flushed_request.GetByUri (r.Uri));
992 } catch (Exception ex) {
993 Logger.Log.Warn (ex, "Caught exception in PostAddHook or PostChildrenIndexedHook '{0}' '{1}' '{2}'",
994 r.Uri, r.FilterName, r.FilterVersion);
997 // Every added Uri also needs to be listed as removed,
998 // to avoid duplicate hits in the query. Since the
999 // removed Uris need to be external Uris, we add them
1000 // to the list *after* post-processing.
1001 removed_uris.Add (r.Uri);
1004 ArrayList to_remove = new ArrayList ();
1005 // Find indexables whose all children are indexed
1006 foreach (ParentIndexableInfo info in parent_indexable_table.Values) {
1007 if (info.NumChildLeft > 0)
1008 continue;
1010 if (Debug)
1011 Log.Debug ("{0} has no more children left, removing", info.Indexable.Uri);
1012 PostChildrenIndexedHook (info.Indexable, info.Receipt, info.LastChildIndexTime);
1013 to_remove.Add (info.Indexable.Uri);
1016 foreach (Uri uri in to_remove)
1017 parent_indexable_table.Remove (uri);
1018 if (Debug)
1019 Log.Debug ("parent_indexable_table now contains {0} parent-child",
1020 parent_indexable_table.Values.Count);
1022 if (fa_store != null)
1023 fa_store.CommitTransaction ();
1025 // Propagate the change notification to any open queries.
1026 if (added_uris.Count > 0 || removed_uris.Count > 0) {
1027 ChangeData change_data;
1028 change_data = new ChangeData ();
1029 change_data.AddedUris = added_uris;
1030 change_data.RemovedUris = removed_uris;
1032 QueryDriver.QueryableChanged (this, change_data);
1036 private void HandleAddReceipt (IndexerAddedReceipt r,
1037 Indexable indexable)
1039 // Map from internal->external Uris in the PostAddHook
1040 IndexerAddedReceipt receipt_copy = new IndexerAddedReceipt (
1041 r.Uri,
1042 r.FilterName,
1043 r.FilterVersion);
1044 PostAddHook (indexable, r);
1046 // Handle child indexables
1047 ParentIndexableInfo info;
1049 // Check if this indexable has any children
1050 info = (ParentIndexableInfo) parent_indexable_table [indexable.Uri];
1052 // Indexable has children, they are already taken care of
1053 if (info != null)
1054 return;
1056 Uri parent_uri = indexable.ParentUri;
1057 if (parent_uri != null) {
1058 // Indexable is itself a child
1059 info = (ParentIndexableInfo) parent_indexable_table [parent_uri];
1062 if (info == null)
1063 // No children, not a child
1064 PostChildrenIndexedHook (indexable, receipt_copy, indexable.Timestamp);
1065 else {
1066 // This indexable is a child registered earlier
1067 info.NumChildLeft --;
1068 info.LastChildIndexTime = indexable.Timestamp;
1069 if (Debug)
1070 Log.Debug ("Finished indexing child for {0} ({1} child left)", info.Indexable.Uri, info.NumChildLeft);
1075 private void HandleRemoveReceipt (IndexerRemovedReceipt r,
1076 Indexable indexable,
1077 ArrayList removed_uris)
1079 // Drop the removed item from the text cache
1080 TextCache.UserCache.Delete (r.Uri);
1082 // Call the appropriate hook
1083 try {
1084 PostRemoveHook (indexable, r);
1085 } catch (Exception ex) {
1086 Logger.Log.Warn (ex, "Caught exception in PostRemoveHook '{0}'",
1087 r.Uri);
1090 // Add the removed Uri to the list for our
1091 // change data. This will be an external Uri
1092 // when we are remapping.
1093 removed_uris.Add (r.Uri);
1096 private void HandleChildIndexableReceipt (IndexerChildIndexablesReceipt r,
1097 int[] child_receipt_count,
1098 int receipt_index)
1100 foreach (Indexable child in r.Children) {
1101 bool please_add_a_new_task = false;
1103 try {
1104 please_add_a_new_task = PreChildAddHook (child);
1105 } catch (InvalidOperationException ex) {
1106 // Queryable does not support adding children
1107 } catch (Exception ex) {
1108 Logger.Log.Warn (ex, "Caught exception in PreChildAddHook '{0}'", child.DisplayUri);
1111 if (! please_add_a_new_task) {
1112 child.Cleanup ();
1113 continue;
1116 if (Debug)
1117 Log.Debug ("Adding child {0} to parent {1}", child.Uri, child.ParentUri);
1119 Scheduler.Task task = NewAddTask (child);
1120 task.SubPriority = 1;
1121 ThisScheduler.Add (task);
1123 // value at 'receipt_index' = number of successful children
1124 child_receipt_count [receipt_index] ++;
1128 //////////////////////////////////////////////////////////////////////////////////
1131 // It is often convenient to have easy access to a FileAttributeStore
1134 virtual protected IFileAttributesStore BuildFileAttributesStore ()
1136 if (ExtendedAttribute.Supported)
1137 return new FileAttributesStore_ExtendedAttribute (IndexFingerprint);
1138 else
1139 return new FileAttributesStore_Sqlite (IndexDirectory, IndexFingerprint);
1143 public FileAttributesStore FileAttributesStore {
1144 get {
1145 if (fa_store == null)
1146 fa_store = new FileAttributesStore (BuildFileAttributesStore ());
1147 return fa_store;
1151 //////////////////////////////////////////////////////////////////////////////////
1153 virtual protected LuceneQueryingDriver BuildLuceneQueryingDriver (string index_name,
1154 int minor_version,
1155 bool read_only_mode)
1157 return new LuceneQueryingDriver (index_name, minor_version, read_only_mode);