Updated Macedonian Translation, Arangel Angov <ufo@linux.net.mk>
[beagle.git] / beagled / LuceneCommon.cs
blob8c33e003906bf14325178bceb296471cd795c7fb
1 //
2 // LuceneCommon.cs
3 //
4 // Copyright (C) 2004-2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.Diagnostics;
30 using System.Globalization;
31 using System.IO;
32 using System.Text;
33 using System.Threading;
34 using System.Xml;
35 using System.Xml.Serialization;
37 using Lucene.Net.Analysis;
38 using Lucene.Net.Analysis.Standard;
39 using Lucene.Net.Documents;
40 using Lucene.Net.Index;
41 using Lucene.Net.QueryParsers;
42 using LNS = Lucene.Net.Search;
44 using Beagle.Util;
46 namespace Beagle.Daemon {
48 public class LuceneCommon {
50 public delegate bool HitFilter (Hit hit);
52 // VERSION HISTORY
53 // ---------------
55 // 1: Original
56 // 2: Changed format of timestamp strings
57 // 3: Schema changed to be more Dashboard-Match-like
58 // 4: Schema changed for files to include _Directory property
59 // 5: Changed analyzer to support stemming. Bumped version # to
60 // force everyone to re-index.
61 // 6: lots of schema changes as part of the general refactoring
62 // 7: incremented to force a re-index after our upgrade to lucene 1.4
63 // (in theory the file formats are compatible, we are seeing 'term
64 // out of order' exceptions in some cases)
65 // 8: another forced re-index, this time because of massive changes
66 // in the file system backend (it would be nice to have per-backend
67 // versioning so that we didn't have to purge all indexes just
68 // because one changed)
69 // 9: changed the way properties are stored, changed in conjunction
70 // with sane handling of multiple properties on hits.
71 // 10: changed to support typed and mutable properties
72 // 11: moved mime type and hit type into properties
73 // 12: added year-month and year-month-day resolutions for all
74 // date properties
75 // 13: moved source into a property
76 // 14: allow wildcard queries to also match keywords
77 private const int MAJOR_VERSION = 14;
78 private int minor_version = 0;
80 private string index_name;
81 private string top_dir;
83 private string fingerprint;
84 private int last_item_count = -1;
86 // This is the big index, containing document full-texts and
87 // data that is expensive to index.
88 private Lucene.Net.Store.Directory primary_store = null;
90 // This is the small index, containing document info that we
91 // expect to have change. Canonical example: file names.
92 private Lucene.Net.Store.Directory secondary_store = null;
94 //////////////////////////////////////////////////////////////////////////////
96 protected LuceneCommon (string index_name, int minor_version)
98 this.index_name = index_name;
99 this.minor_version = minor_version;
101 this.top_dir = (Path.IsPathRooted (index_name)) ? index_name : Path.Combine (PathFinder.IndexDir, index_name);
104 //////////////////////////////////////////////////////////////////////////////
106 protected string IndexName { get { return index_name; } }
108 public Lucene.Net.Store.Directory PrimaryStore { get { return primary_store; } }
110 public Lucene.Net.Store.Directory SecondaryStore { get { return secondary_store; } }
112 public string Fingerprint { get { return fingerprint; } }
114 public string TopDirectory { get { return top_dir; } }
116 //////////////////////////////////////////////////////////////////////////////
118 protected TextCache text_cache = null;
120 public TextCache TextCache {
121 get { return text_cache; }
122 set { text_cache = value; }
125 //////////////////////////////////////////////////////////////////////////////
127 private string VersionFile {
128 get { return Path.Combine (top_dir, "version"); }
131 private string FingerprintFile {
132 get { return Path.Combine (top_dir, "fingerprint"); }
135 // Shouldn't really be public
136 public string PrimaryIndexDirectory {
137 get { return Path.Combine (top_dir, "PrimaryIndex"); }
140 // Shouldn't really be public
141 public string SecondaryIndexDirectory {
142 get { return Path.Combine (top_dir, "SecondaryIndex"); }
145 public string LockDirectory {
146 get { return Path.Combine (top_dir, "Locks"); }
149 //////////////////////////////////////////////////////////////////////////////
151 // Deal with dangling locks
153 private bool IsDanglingLock (FileInfo info)
155 Log.Debug ("Checking for dangling locks...");
157 // It isn't even a lock file
158 if (! info.Name.EndsWith (".lock"))
159 return false;
161 StreamReader reader;
162 string pid = null;
164 try {
165 reader = new StreamReader (info.FullName);
166 pid = reader.ReadLine ();
167 reader.Close ();
169 } catch {
170 // We couldn't read the lockfile, so it probably went away.
171 return false;
175 if (pid == null) {
176 // Looks like the lock file was empty, which really
177 // shouldn't happen. It should contain the PID of
178 // the process which locked it. Lets be on the safe
179 // side and assume it's a dangling lock.
180 Log.Warn ("Found an empty lock file, that shouldn't happen: {0}", info.FullName);
181 return true;
184 string cmdline_file;
185 cmdline_file = String.Format ("/proc/{0}/cmdline", pid);
187 string cmdline = "";
188 try {
189 reader = new StreamReader (cmdline_file);
190 cmdline = reader.ReadLine ();
191 reader.Close ();
192 } catch {
193 // If we can't open that file, either:
194 // (1) The process doesn't exist
195 // (2) It does exist, but it doesn't belong to us.
196 // Thus it isn't an IndexHelper
197 // In either case, the lock is dangling --- if it
198 // still exists.
199 return info.Exists;
202 // The process exists, but isn't an IndexHelper.
203 // If the lock file is still there, it is dangling.
204 // FIXME: During one run of bludgeon I got a null reference
205 // exception here, so I added the cmdline == null check.
206 // Why exactly would that happen? Is this logic correct
207 // in that (odd and presumably rare) case?
208 if (cmdline == null || cmdline.IndexOf ("IndexHelper.exe") == -1)
209 return info.Exists;
211 // If we reach this point, we know:
212 // (1) The process still exists
213 // (2) We own it
214 // (3) It is an IndexHelper process
215 // Thus it almost certainly isn't a dangling lock.
216 // The process might be wedged, but that is
217 // another issue...
218 return false;
221 protected bool Exists ()
223 if (! (Directory.Exists (top_dir)
224 && File.Exists (VersionFile)
225 && File.Exists (FingerprintFile)
226 && Directory.Exists (PrimaryIndexDirectory)
227 && IndexReader.IndexExists (PrimaryIndexDirectory)
228 && Directory.Exists (SecondaryIndexDirectory)
229 && IndexReader.IndexExists (SecondaryIndexDirectory)
230 && Directory.Exists (LockDirectory)))
231 return false;
233 // Check the index's version number. If it is wrong,
234 // declare the index non-existent.
236 StreamReader version_reader;
237 string version_str;
238 version_reader = new StreamReader (VersionFile);
239 version_str = version_reader.ReadLine ();
240 version_reader.Close ();
242 int current_major_version, current_minor_version;
243 int i = version_str.IndexOf ('.');
245 if (i != -1) {
246 current_major_version = Convert.ToInt32 (version_str.Substring (0, i));
247 current_minor_version = Convert.ToInt32 (version_str.Substring (i+1));
248 } else {
249 current_minor_version = Convert.ToInt32 (version_str);
250 current_major_version = 0;
253 if (current_major_version != MAJOR_VERSION
254 || (minor_version >= 0 && current_minor_version != minor_version)) {
255 Logger.Log.Debug ("Version mismatch in {0}", index_name);
256 Logger.Log.Debug ("Index has version {0}.{1}, expected {2}.{3}",
257 current_major_version, current_minor_version,
258 MAJOR_VERSION, minor_version);
259 return false;
262 // Check the lock directory: If there is a dangling write lock,
263 // assume that the index is corrupted and declare it non-existent.
264 DirectoryInfo lock_dir_info;
265 lock_dir_info = new DirectoryInfo (LockDirectory);
266 foreach (FileInfo info in lock_dir_info.GetFiles ()) {
267 if (IsDanglingLock (info)) {
268 Logger.Log.Warn ("Found a dangling index lock on {0}", info.FullName);
269 return false;
273 return true;
276 private Lucene.Net.Store.Directory CreateIndex (string path)
278 // Create a directory to put the index in.
279 Directory.CreateDirectory (path);
281 // Create a new store.
282 Lucene.Net.Store.Directory store;
283 store = Lucene.Net.Store.FSDirectory.GetDirectory (path, LockDirectory, true);
285 // Create an empty index in that store.
286 IndexWriter writer;
287 writer = new IndexWriter (store, null, true);
288 writer.Close ();
290 return store;
293 // Create will kill your index dead. Use it with care.
294 // You don't need to call Open after calling Create.
295 protected void Create ()
297 if (minor_version < 0)
298 minor_version = 0;
300 // Purge any existing directories.
301 if (Directory.Exists (top_dir)) {
302 Logger.Log.Debug ("Purging {0}", top_dir);
303 Directory.Delete (top_dir, true);
306 // Create any necessary directories.
307 Directory.CreateDirectory (top_dir);
308 Directory.CreateDirectory (LockDirectory);
310 // Create the indexes.
311 primary_store = CreateIndex (PrimaryIndexDirectory);
312 secondary_store = CreateIndex (SecondaryIndexDirectory);
314 // Generate and store the index fingerprint.
315 fingerprint = GuidFu.ToShortString (Guid.NewGuid ());
316 TextWriter writer;
317 writer = new StreamWriter (FingerprintFile, false);
318 writer.WriteLine (fingerprint);
319 writer.Close ();
321 // Store our index version information.
322 writer = new StreamWriter (VersionFile, false);
323 writer.WriteLine ("{0}.{1}", MAJOR_VERSION, minor_version);
324 writer.Close ();
327 protected void Open ()
329 Open (false);
332 protected void Open (bool read_only_mode)
334 // Read our index fingerprint.
335 TextReader reader;
336 reader = new StreamReader (FingerprintFile);
337 fingerprint = reader.ReadLine ();
338 reader.Close ();
340 // Create stores for our indexes.
341 primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, LockDirectory, false, read_only_mode);
342 secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, LockDirectory, false, read_only_mode);
345 ////////////////////////////////////////////////////////////////
348 // Custom Analyzers
351 private class SingletonTokenStream : TokenStream {
353 private string singleton_str;
355 public SingletonTokenStream (string singleton_str)
357 this.singleton_str = singleton_str;
360 override public Lucene.Net.Analysis.Token Next ()
362 if (singleton_str == null)
363 return null;
365 Lucene.Net.Analysis.Token token;
366 token = new Lucene.Net.Analysis.Token (singleton_str, 0, singleton_str.Length);
368 singleton_str = null;
370 return token;
374 // FIXME: This assumes everything being indexed is in English!
375 private class BeagleAnalyzer : StandardAnalyzer {
377 private char [] buffer = new char [2];
378 private bool strip_extra_property_info = false;
380 public BeagleAnalyzer (bool strip_extra_property_info)
382 this.strip_extra_property_info = strip_extra_property_info;
385 public override TokenStream TokenStream (string fieldName, TextReader reader)
387 bool is_text_prop = false;
389 // Strip off the first two characters in a property.
390 // We store type information in those two characters, so we don't
391 // want to index them.
392 if (fieldName.StartsWith ("prop:")) {
394 if (strip_extra_property_info) {
395 // Skip everything up to and including the first :
396 int c;
397 do {
398 c = reader.Read ();
399 } while (c != -1 && c != ':');
402 is_text_prop = fieldName.StartsWith ("prop:t");
404 // If this is non-text property, just return one token
405 // containing the entire string. We do this to avoid
406 // tokenizing keywords.
407 if (! is_text_prop)
408 return new SingletonTokenStream (reader.ReadToEnd ());
411 TokenStream outstream;
412 outstream = base.TokenStream (fieldName, reader);
414 if (fieldName == "Text"
415 || fieldName == "HotText"
416 || fieldName == "PropertyText"
417 || is_text_prop) {
418 outstream = new NoiseFilter (outstream);
419 outstream = new PorterStemFilter (outstream);
422 return outstream;
426 static private Analyzer indexing_analyzer = new BeagleAnalyzer (true);
427 static private Analyzer query_analyzer = new BeagleAnalyzer (false);
429 static protected Analyzer IndexingAnalyzer { get { return indexing_analyzer; } }
430 static protected Analyzer QueryAnalyzer { get { return query_analyzer; } }
432 ////////////////////////////////////////////////////////////////
435 // Dealing with properties
438 static private char TypeToCode (PropertyType type)
440 switch (type) {
441 case PropertyType.Text: return 't';
442 case PropertyType.Keyword: return 'k';
443 case PropertyType.Date: return 'd';
445 throw new Exception ("Bad property type: " + type);
448 static private PropertyType CodeToType (char c)
450 switch (c) {
451 case 't': return PropertyType.Text;
452 case 'k': return PropertyType.Keyword;
453 case 'd': return PropertyType.Date;
456 throw new Exception ("Bad property code: " + c);
459 static private string TypeToWildcardField (PropertyType type)
461 switch (type) {
462 case PropertyType.Text: return "PropertyText";
463 case PropertyType.Keyword: return "PropertyKeyword";
464 case PropertyType.Date: return "PropertyDate";
467 return null;
470 // Exposing this is a little bit suspicious.
471 static protected string PropertyToFieldName (PropertyType type, string key)
473 return String.Format ("prop:{0}:{1}", TypeToCode (type), key);
477 static private void AddDateFields (string field_name, Property prop, Document doc)
479 DateTime dt = StringFu.StringToDateTime (prop.Value);
481 Field f;
482 f = new Field ("YM:" + field_name,
483 StringFu.DateTimeToYearMonthString (dt),
484 false, // never store
485 true, // always index
486 false); // never tokenize
487 doc.Add (f);
489 f = new Field ("D:" + field_name,
490 StringFu.DateTimeToDayString (dt),
491 false, // never store
492 true, // always index
493 false); // never tokenize
494 doc.Add (f);
497 static protected void AddPropertyToDocument (Property prop, Document doc)
499 if (prop == null || prop.Value == null)
500 return;
502 // Don't actually put properties in the UnindexedNamespace
503 // in the document. A horrible (and yet lovely!) hack.
504 if (prop.Key.StartsWith (StringFu.UnindexedNamespace))
505 return;
507 Field f;
509 if (prop.IsSearched) {
510 string wildcard_field = TypeToWildcardField (prop.Type);
511 bool tokenize = (prop.Type == PropertyType.Text);
512 if (wildcard_field != null) {
513 f = new Field (wildcard_field,
514 prop.Value,
515 false, // never stored
516 true, // always indexed
517 tokenize);
518 doc.Add (f);
520 if (prop.Type == PropertyType.Date)
521 AddDateFields (wildcard_field, prop, doc);
525 string coded_value;
526 coded_value = String.Format ("{0}:{1}",
527 prop.IsSearched ? 's' : '_',
528 prop.Value);
530 string field_name = PropertyToFieldName (prop.Type, prop.Key);
532 f = new Field (field_name,
533 coded_value,
534 prop.IsStored,
535 true, // always index
536 true); // always tokenize (just strips off type code for keywords)
537 doc.Add (f);
539 if (prop.Type == PropertyType.Date)
540 AddDateFields (field_name, prop, doc);
543 static protected Property GetPropertyFromDocument (Field f, Document doc, bool from_primary_index)
545 // Note: we don't use the document that we pass in,
546 // but in theory we could. At some later point we
547 // might need to split a property's data across two or
548 // more fields in the document.
550 if (f == null)
551 return null;
553 string field_name;
554 field_name = f.Name ();
555 if (field_name.Length < 7
556 || ! field_name.StartsWith ("prop:"))
557 return null;
559 string field_value;
560 field_value = f.StringValue ();
562 Property prop;
563 prop = new Property ();
564 prop.Type = CodeToType (field_name [5]);
565 prop.Key = field_name.Substring (7);
566 prop.Value = field_value.Substring (2);
567 prop.IsSearched = (field_value [0] == 's');
568 prop.IsMutable = ! from_primary_index;
569 prop.IsStored = f.IsStored ();
571 return prop;
574 //////////////////////////////////////////////////////////////////////////////
577 // Dealing with documents
580 static protected void BuildDocuments (Indexable indexable,
581 out Document primary_doc,
582 out Document secondary_doc)
584 primary_doc = new Document ();
585 secondary_doc = null;
587 Field f;
589 f = Field.Keyword ("Uri", UriFu.UriToSerializableString (indexable.Uri));
590 primary_doc.Add (f);
592 if (indexable.ParentUri != null) {
593 f = Field.Keyword ("ParentUri", UriFu.UriToSerializableString (indexable.ParentUri));
594 primary_doc.Add (f);
597 if (indexable.ValidTimestamp) {
598 // Note that we also want to search in the
599 // Timestamp field when we do a wildcard date
600 // query, so that's why we also add a wildcard
601 // field for each item here.
603 string wildcard_field = TypeToWildcardField (PropertyType.Date);
605 string str = StringFu.DateTimeToString (indexable.Timestamp);
606 f = Field.Keyword ("Timestamp", str);
607 primary_doc.Add (f);
608 f = Field.UnStored (wildcard_field, str);
609 primary_doc.Add (f);
611 str = StringFu.DateTimeToYearMonthString (indexable.Timestamp);
612 f = Field.Keyword ("YM:Timestamp", str);
613 primary_doc.Add (f);
614 f = Field.UnStored ("YM:" + wildcard_field, str);
615 primary_doc.Add (f);
617 str = StringFu.DateTimeToDayString (indexable.Timestamp);
618 f = Field.Keyword ("D:Timestamp", str);
619 primary_doc.Add (f);
620 f = Field.UnStored ("D:" + wildcard_field, str);
621 primary_doc.Add (f);
624 if (indexable.NoContent) {
625 // If there is no content, make a note of that
626 // in a special property.
627 Property prop;
628 prop = Property.NewBool ("beagle:NoContent", true);
629 AddPropertyToDocument (prop, primary_doc);
631 } else {
633 // Since we might have content, add our text
634 // readers.
636 TextReader reader;
638 reader = indexable.GetTextReader ();
639 if (reader != null) {
640 f = Field.Text ("Text", reader);
641 primary_doc.Add (f);
644 reader = indexable.GetHotTextReader ();
645 if (reader != null) {
646 f = Field.Text ("HotText", reader);
647 primary_doc.Add (f);
651 // Store the Type and MimeType in special properties
653 if (indexable.HitType != null) {
654 Property prop;
655 prop = Property.NewUnsearched ("beagle:HitType", indexable.HitType);
656 AddPropertyToDocument (prop, primary_doc);
659 if (indexable.MimeType != null) {
660 Property prop;
661 prop = Property.NewUnsearched ("beagle:MimeType", indexable.MimeType);
662 AddPropertyToDocument (prop, primary_doc);
665 if (indexable.Source != null) {
666 Property prop;
667 prop = Property.NewUnsearched ("beagle:Source", indexable.Source);
668 AddPropertyToDocument (prop, primary_doc);
671 // Store the other properties
673 foreach (Property prop in indexable.Properties) {
674 Document target_doc = primary_doc;
675 if (prop.IsMutable) {
676 if (secondary_doc == null) {
677 secondary_doc = new Document ();
678 f = Field.Keyword ("Uri", UriFu.UriToSerializableString (indexable.Uri));
679 secondary_doc.Add (f);
681 target_doc = secondary_doc;
684 AddPropertyToDocument (prop, target_doc);
688 static protected Document RewriteDocument (Document old_secondary_doc,
689 Indexable prop_only_indexable)
691 Hashtable seen_props;
692 seen_props = new Hashtable ();
694 Document new_doc;
695 new_doc = new Document ();
697 Field uri_f;
698 uri_f = Field.Keyword ("Uri", UriFu.UriToSerializableString (prop_only_indexable.Uri));
699 new_doc.Add (uri_f);
701 Logger.Log.Debug ("Rewriting {0}", prop_only_indexable.DisplayUri);
703 // Add the new properties to the new document. To
704 // delete a property, set the Value to null... then it
705 // will be added to seen_props (so the old value will
706 // be ignored below), but AddPropertyToDocument will
707 // return w/o doing anything.
708 foreach (Property prop in prop_only_indexable.Properties) {
709 seen_props [prop.Key] = prop;
710 AddPropertyToDocument (prop, new_doc);
711 Logger.Log.Debug ("New prop '{0}' = '{1}'", prop.Key, prop.Value);
714 // Copy the other properties from the old document to the
715 // new one, skipping any properties that we got new values
716 // for out of the Indexable.
717 if (old_secondary_doc != null) {
718 foreach (Field f in old_secondary_doc.Fields ()) {
719 Property prop;
720 prop = GetPropertyFromDocument (f, old_secondary_doc, false);
721 if (prop != null && ! seen_props.Contains (prop.Key)) {
722 Logger.Log.Debug ("Old prop '{0}' = '{1}'", prop.Key, prop.Value);
723 AddPropertyToDocument (prop, new_doc);
728 return new_doc;
731 static protected Uri GetUriFromDocument (Document doc)
733 string uri;
734 uri = doc.Get ("Uri");
735 if (uri == null)
736 throw new Exception ("Got document from Lucene w/o a URI!");
737 return UriFu.UriStringToUri (uri);
740 static protected Hit DocumentToHit (Document doc)
742 Hit hit;
743 hit = new Hit ();
745 hit.Uri = GetUriFromDocument (doc);
747 string str;
748 str = doc.Get ("ParentUri");
749 if (str != null)
750 hit.ParentUri = UriFu.UriStringToUri (str);
752 hit.Timestamp = StringFu.StringToDateTime (doc.Get ("Timestamp"));
754 AddPropertiesToHit (hit, doc, true);
756 // Get the Type and MimeType from the properties.
757 hit.Type = hit.GetFirstProperty ("beagle:HitType");
758 hit.MimeType = hit.GetFirstProperty ("beagle:MimeType");
759 hit.Source = hit.GetFirstProperty ("beagle:Source");
761 return hit;
764 static protected void AddPropertiesToHit (Hit hit, Document doc, bool from_primary_index)
766 foreach (Field f in doc.Fields ()) {
767 Property prop;
768 prop = GetPropertyFromDocument (f, doc, from_primary_index);
769 if (prop != null)
770 hit.AddProperty (prop);
775 //////////////////////////////////////////////////////////////////////////////
778 // Handle the index's item count
781 public int GetItemCount ()
783 if (last_item_count < 0) {
784 IndexReader reader;
785 reader = GetReader (PrimaryStore);
786 last_item_count = reader.NumDocs ();
787 ReleaseReader (reader);
789 return last_item_count;
792 // We should set the cached count of index items when IndexReaders
793 // are open and available, so calls to GetItemCount will return immediately.
795 protected bool HaveItemCount { get { return last_item_count >= 0; } }
797 protected void SetItemCount (IndexReader reader)
799 last_item_count = reader.NumDocs ();
802 public void SetItemCount (int count)
804 last_item_count = count;
807 protected void AdjustItemCount (int delta)
809 if (last_item_count >= 0)
810 last_item_count += delta;
813 //////////////////////////////////////////////////////////////////////////////
816 // Access to the stemmer and list of stop words
819 static PorterStemmer stemmer = new PorterStemmer ();
821 static public string Stem (string str)
823 return stemmer.Stem (str);
826 public static bool IsStopWord (string stemmed_word)
828 return ArrayFu.IndexOfString (StopAnalyzer.ENGLISH_STOP_WORDS, stemmed_word) != -1;
831 //////////////////////////////////////////////////////////////////////////////
834 // Special Hit Filtering classes
837 static private bool TrueHitFilter (Hit hit)
839 return true;
842 static private HitFilter true_hit_filter = new HitFilter (TrueHitFilter);
844 public class OrHitFilter {
846 private ArrayList all = new ArrayList ();
847 private bool contains_known_true = false;
849 public void Add (HitFilter hit_filter)
851 if (hit_filter == true_hit_filter)
852 contains_known_true = true;
853 all.Add (hit_filter);
856 public bool HitFilter (Hit hit)
858 if (contains_known_true)
859 return true;
860 foreach (HitFilter hit_filter in all)
861 if (hit_filter (hit))
862 return true;
863 return false;
867 public class AndHitFilter {
869 private ArrayList all = new ArrayList ();
871 public void Add (HitFilter hit_filter)
873 all.Add (hit_filter);
876 public bool HitFilter (Hit hit)
878 foreach (HitFilter hit_filter in all)
879 if (! hit_filter (hit))
880 return false;
881 return true;
885 public class NotHitFilter {
886 HitFilter original;
888 public NotHitFilter (HitFilter original)
890 this.original = original;
893 public bool HitFilter (Hit hit)
895 return ! original (hit);
899 //////////////////////////////////////////////////////////////////////////////
902 // Queries
905 static private LNS.Query StringToQuery (string field_name,
906 string text,
907 ArrayList term_list)
909 ArrayList tokens = new ArrayList ();
911 // Use the analyzer to extract the query's tokens.
912 // This code is taken from Lucene's query parser.
913 TokenStream source = QueryAnalyzer.TokenStream (field_name, new StringReader (text));
914 while (true) {
915 Lucene.Net.Analysis.Token token;
916 try {
917 token = source.Next ();
918 if (token == null)
919 break;
920 } catch (IOException) {
921 break;
923 if (token != null)
924 tokens.Add (token.TermText ());
926 try {
927 source.Close ();
928 } catch (IOException) {
929 // ignore
932 if (tokens.Count == 0)
933 return null;
935 LNS.PhraseQuery query = new LNS.PhraseQuery ();
937 foreach (string token in tokens) {
938 Term term;
939 term = new Term (field_name, token);
940 query.Add (term);
941 if (term_list != null)
942 term_list.Add (term);
945 return query;
949 // Date Range Handling
952 // This function will break down dates to discrete chunks of
953 // time to avoid expanding RangeQuerys as much as possible.
954 // For example, searching for
956 // YMD(5 May 2005, 16 Oct 2006)
958 // would break down into three queries:
960 // (YM(May 2005) AND D(5,31)) OR
961 // YM(Jun 2005, Sep 2006) OR
962 // (YM(Oct 2006) AND D(1,16))
964 static private DateTime lower_bound = new DateTime (1970, 1, 1);
966 // FIXME: we should probably boost this sometime around 2030.
967 // Mark your calendar.
968 static private DateTime upper_bound = new DateTime (2038, 12, 31);
970 static private Term NewYearMonthTerm (string field_name, int y, int m)
972 return new Term ("YM:" + field_name, String.Format ("{0}{1:00}", y, m));
975 static private LNS.Query NewYearMonthQuery (string field_name, int y, int m)
977 return new LNS.TermQuery (NewYearMonthTerm (field_name, y, m));
980 static private LNS.Query NewYearMonthQuery (string field_name, int y1, int m1, int y2, int m2)
982 return new LNS.RangeQuery (NewYearMonthTerm (field_name, y1, m1),
983 NewYearMonthTerm (field_name, y2, m2),
984 true); // query is inclusive
987 static private Term NewDayTerm (string field_name, int d)
989 return new Term ("D:" + field_name, String.Format ("{0:00}", d));
992 static private LNS.Query NewDayQuery (string field_name, int d1, int d2)
994 return new LNS.RangeQuery (NewDayTerm (field_name, d1),
995 NewDayTerm (field_name, d2),
996 true); // query is inclusive
999 private class DateRangeHitFilter {
1000 public string Key;
1001 public DateTime StartDate;
1002 public DateTime EndDate;
1004 public bool HitFilter (Hit hit)
1006 // First, check the Timestamp
1007 if (Key == QueryPart_DateRange.AllPropertiesKey
1008 || Key == QueryPart_DateRange.TimestampKey) {
1009 DateTime dt;
1010 dt = hit.Timestamp;
1011 if (StartDate <= dt && dt <= EndDate)
1012 return true;
1013 if (Key == QueryPart_DateRange.TimestampKey)
1014 return false;
1017 if (Key == QueryPart_DateRange.AllPropertiesKey) {
1018 // Walk through all of the properties, and see if any
1019 // date properties fall inside the range.
1020 foreach (Property prop in hit.Properties) {
1021 if (prop.Type == PropertyType.Date) {
1022 DateTime dt;
1023 dt = StringFu.StringToDateTime (prop.Value);
1024 if (StartDate <= dt && dt <= EndDate)
1025 return true;
1028 return false;
1029 } else {
1030 // Walk through all of the properties with the given key,
1031 // and see if any of them fall inside of the range.
1032 string[] values;
1033 values = hit.GetProperties (Key);
1034 foreach (string v in values) {
1035 DateTime dt;
1036 dt = StringFu.StringToDateTime (v);
1037 if (StartDate <= dt && dt <= EndDate)
1038 return true;
1040 return false;
1045 static private LNS.Query GetDateRangeQuery (QueryPart_DateRange part, out HitFilter hit_filter)
1047 string field_name;
1048 if (part.Key == QueryPart_DateRange.AllPropertiesKey)
1049 field_name = TypeToWildcardField (PropertyType.Date);
1050 else if (part.Key == QueryPart_DateRange.TimestampKey)
1051 field_name = "Timestamp";
1052 else
1053 field_name = PropertyToFieldName (PropertyType.Date, part.Key);
1055 // FIXME: We could optimize this and reduce the size of our range
1056 // queries if we actually new the min and max date that appear in
1057 // any properties in the index. We would need to inspect the index to
1058 // determine that at start-up, and then track it as new documents
1059 // get added to the index.
1060 if (part.StartDate < lower_bound)
1061 part.StartDate = lower_bound;
1062 if (part.EndDate > upper_bound || part.EndDate == DateTime.MinValue)
1063 part.EndDate = upper_bound;
1065 // Swap the start and end dates if they come in reversed.
1066 if (part.StartDate > part.EndDate) {
1067 DateTime swap;
1068 swap = part.StartDate;
1069 part.StartDate = part.EndDate;
1070 part.EndDate = swap;
1073 // Set up our hit filter to cull out the bad dates.
1074 DateRangeHitFilter drhf;
1075 drhf = new DateRangeHitFilter ();
1076 drhf.Key = part.Key;
1077 drhf.StartDate = part.StartDate;
1078 drhf.EndDate = part.EndDate;
1079 hit_filter = new HitFilter (drhf.HitFilter);
1081 Logger.Log.Debug ("Building new date range query");
1082 Logger.Log.Debug ("Start: {0}", part.StartDate);
1083 Logger.Log.Debug ("End: {0}", part.EndDate);
1085 int y1, m1, d1, y2, m2, d2;
1086 y1 = part.StartDate.Year;
1087 m1 = part.StartDate.Month;
1088 d1 = part.StartDate.Day;
1089 y2 = part.EndDate.Year;
1090 m2 = part.EndDate.Month;
1091 d2 = part.EndDate.Day;
1093 LNS.BooleanQuery top_level_query;
1094 top_level_query = new LNS.BooleanQuery ();
1096 // A special case: both the start and the end of our range fall
1097 // in the same month.
1098 if (y1 == y2 && m1 == m2) {
1099 LNS.Query ym_query;
1100 ym_query = NewYearMonthQuery (field_name, y1, m1);
1102 // If our range only covers a part of the month, do a range query on the days.
1103 if (d1 != 1 || d2 != DateTime.DaysInMonth (y2, m2)) {
1104 LNS.BooleanQuery sub_query;
1105 sub_query = new LNS.BooleanQuery ();
1106 sub_query.Add (ym_query, true, false);
1107 sub_query.Add (NewDayQuery (field_name, d1, d2), true, false);
1108 top_level_query.Add (sub_query, false, false);
1109 } else {
1110 top_level_query.Add (ym_query, false, false);
1113 } else {
1115 // Handle a partial month at the beginning of our range.
1116 if (d1 > 1) {
1117 LNS.BooleanQuery sub_query;
1118 sub_query = new LNS.BooleanQuery ();
1119 sub_query.Add (NewYearMonthQuery (field_name, y1, m1), true, false);
1120 sub_query.Add (NewDayQuery (field_name, d1, DateTime.DaysInMonth (y1, m1)), true, false);
1121 top_level_query.Add (sub_query, false, false);
1123 ++m1;
1124 if (m1 == 13) {
1125 m1 = 1;
1126 ++y1;
1130 // And likewise, handle a partial month at the end of our range.
1131 if (d2 < DateTime.DaysInMonth (y2, m2)) {
1132 LNS.BooleanQuery sub_query;
1133 sub_query = new LNS.BooleanQuery ();
1134 sub_query.Add (NewYearMonthQuery (field_name, y2, m2), true, false);
1135 sub_query.Add (NewDayQuery (field_name, 1, d2), true, false);
1136 top_level_query.Add (sub_query, false, false);
1138 --m2;
1139 if (m2 == 0) {
1140 m2 = 12;
1141 --y2;
1145 // Generate the query for the "middle" of our period, if it is non-empty
1146 if (y1 < y2 || ((y1 == y2) && m1 <= m2))
1147 top_level_query.Add (NewYearMonthQuery (field_name, y1, m1, y2, m2),
1148 false, false);
1151 return top_level_query;
1154 // search_subset_uris is a list of Uris that this search should be
1155 // limited to.
1156 static protected void QueryPartToQuery (QueryPart abstract_part,
1157 bool only_build_primary_query,
1158 ArrayList term_list,
1159 out LNS.Query primary_query,
1160 out LNS.Query secondary_query,
1161 out HitFilter hit_filter)
1163 primary_query = null;
1164 secondary_query = null;
1166 // By default, we assume that our lucene queries will return exactly the
1167 // matching set of objects. We need to set the hit filter if further
1168 // refinement of the search results is required. (As in the case of
1169 // date range queries, for example.) We essentially have to do this
1170 // to make OR queries work correctly.
1171 hit_filter = true_hit_filter;
1173 // The exception is when dealing with a prohibited part. Just return
1174 // null for the hit filter in that case. This works since
1175 // prohibited parts are not allowed inside of OR queries.
1176 if (abstract_part.Logic == QueryPartLogic.Prohibited)
1177 hit_filter = null;
1179 if (abstract_part == null)
1180 return;
1182 if (abstract_part is QueryPart_Text) {
1183 QueryPart_Text part = (QueryPart_Text) abstract_part;
1185 if (! (part.SearchFullText || part.SearchTextProperties))
1186 return;
1188 LNS.BooleanQuery p_query = new LNS.BooleanQuery ();
1189 LNS.BooleanQuery s_query = new LNS.BooleanQuery ();
1191 if (part.SearchFullText) {
1192 LNS.Query subquery;
1193 subquery = StringToQuery ("Text", part.Text, term_list);
1194 if (subquery != null)
1195 p_query.Add (subquery, false, false);
1197 // FIXME: HotText is ignored for now!
1198 // subquery = StringToQuery ("HotText", part.Text);
1199 // if (subquery != null)
1200 // p_query.Add (subquery, false, false);
1203 if (part.SearchTextProperties) {
1204 LNS.Query subquery;
1205 subquery = StringToQuery ("PropertyText", part.Text, term_list);
1206 if (subquery != null) {
1207 p_query.Add (subquery, false, false);
1208 // Properties can live in either index
1209 if (! only_build_primary_query)
1210 s_query.Add (subquery.Clone () as LNS.Query, false, false);
1213 Term term;
1214 term = new Term ("PropertyKeyword", part.Text);
1215 // FIXME: terms are already added in term_list. But they may have been tokenized
1216 // The term here is non-tokenized version. Should this be added to term_list ?
1217 // term_list is used to calculate scores
1218 if (term_list != null)
1219 term_list.Add (term);
1220 subquery = new LNS.TermQuery (term);
1221 p_query.Add (subquery, false, false);
1222 // Properties can live in either index
1223 if (! only_build_primary_query)
1224 s_query.Add (subquery.Clone () as LNS.Query, false, false);
1227 primary_query = p_query;
1228 if (! only_build_primary_query)
1229 secondary_query = s_query;
1231 return;
1234 if (abstract_part is QueryPart_Property) {
1235 QueryPart_Property part = (QueryPart_Property) abstract_part;
1237 string field_name;
1238 if (part.Key == QueryPart_Property.AllProperties) {
1239 field_name = TypeToWildcardField (part.Type);
1240 // FIXME: probably shouldn't just return silently
1241 if (field_name == null)
1242 return;
1243 } else
1244 field_name = PropertyToFieldName (part.Type, part.Key);
1246 if (part.Type == PropertyType.Text)
1247 primary_query = StringToQuery (field_name, part.Value, term_list);
1248 else {
1249 Term term;
1250 term = new Term (field_name, part.Value);
1251 if (term_list != null)
1252 term_list.Add (term);
1253 primary_query = new LNS.TermQuery (term);
1256 // Properties can live in either index
1257 if (! only_build_primary_query && primary_query != null)
1258 secondary_query = primary_query.Clone () as LNS.Query;
1260 return;
1263 if (abstract_part is QueryPart_DateRange) {
1265 QueryPart_DateRange part = (QueryPart_DateRange) abstract_part;
1267 primary_query = GetDateRangeQuery (part, out hit_filter);
1268 // Date properties can live in either index
1269 if (! only_build_primary_query && primary_query != null)
1270 secondary_query = primary_query.Clone () as LNS.Query;
1272 // If this is a prohibited part, invert our hit filter.
1273 if (part.Logic == QueryPartLogic.Prohibited) {
1274 NotHitFilter nhf;
1275 nhf = new NotHitFilter (hit_filter);
1276 hit_filter = new HitFilter (nhf.HitFilter);
1279 return;
1282 if (abstract_part is QueryPart_Or) {
1283 QueryPart_Or part = (QueryPart_Or) abstract_part;
1285 // Assemble a new BooleanQuery combining all of the sub-parts.
1286 LNS.BooleanQuery p_query;
1287 p_query = new LNS.BooleanQuery ();
1289 LNS.BooleanQuery s_query = null;
1290 if (! only_build_primary_query)
1291 s_query = new LNS.BooleanQuery ();
1293 primary_query = p_query;
1294 secondary_query = s_query;
1296 OrHitFilter or_hit_filter = null;
1298 foreach (QueryPart sub_part in part.SubParts) {
1299 LNS.Query p_subq, s_subq;
1300 HitFilter sub_hit_filter; // FIXME: This is (and must be) ignored
1301 // FIXME: Any subpart in an OR which has a hit filter won't work
1302 // correctly, because we can't tell which part of an OR we matched
1303 // against to filter correctly. This affects date range queries.
1304 QueryPartToQuery (sub_part, only_build_primary_query,
1305 term_list,
1306 out p_subq, out s_subq, out sub_hit_filter);
1307 if (p_subq != null)
1308 p_query.Add (p_subq, false, false);
1309 if (s_subq != null)
1310 s_query.Add (s_subq, false, false);
1311 if (sub_hit_filter != null) {
1312 if (or_hit_filter == null)
1313 or_hit_filter = new OrHitFilter ();
1314 or_hit_filter.Add (sub_hit_filter);
1318 if (or_hit_filter != null)
1319 hit_filter = new HitFilter (or_hit_filter.HitFilter);
1321 return;
1324 throw new Exception ("Unhandled QueryPart type! " + abstract_part.ToString ());
1327 static protected LNS.Query UriQuery (string field_name, Uri uri)
1329 return new LNS.TermQuery (new Term (field_name, UriFu.UriToSerializableString (uri)));
1332 static protected LNS.Query UriQuery (string field_name, ICollection uri_list)
1334 return UriQuery (field_name, uri_list, null);
1337 static protected LNS.Query UriQuery (string field_name, ICollection uri_list, LNS.Query extra_requirement)
1339 if (uri_list.Count == 0)
1340 return null;
1342 int max_clauses;
1343 max_clauses = LNS.BooleanQuery.GetMaxClauseCount ();
1345 int N;
1346 N = 1 + (uri_list.Count - 1) / max_clauses;
1348 LNS.BooleanQuery top_query;
1349 top_query = new LNS.BooleanQuery ();
1351 int cursor = 0;
1352 if (extra_requirement != null) {
1353 top_query.Add (extra_requirement, true, false);
1354 ++cursor;
1357 ArrayList bottom_queries = null;
1359 if (N > 1) {
1360 bottom_queries = new ArrayList ();
1361 for (int i = 0; i < N; ++i) {
1362 LNS.BooleanQuery bq;
1363 bq = new LNS.BooleanQuery ();
1364 bottom_queries.Add (bq);
1365 top_query.Add (bq, false, false);
1369 foreach (Uri uri in uri_list) {
1370 LNS.Query subquery;
1371 subquery = UriQuery (field_name, uri);
1373 LNS.BooleanQuery target;
1374 if (N == 1)
1375 target = top_query;
1376 else {
1377 target = (LNS.BooleanQuery) bottom_queries [cursor];
1378 ++cursor;
1379 if (cursor >= N)
1380 cursor = 0;
1383 target.Add (subquery, false, false);
1386 return top_query;
1389 ///////////////////////////////////////////////////////////////////////////////////
1391 public int SegmentCount {
1392 get {
1393 DirectoryInfo dir_info;
1394 int p_count = 0, s_count = 0;
1396 dir_info = new DirectoryInfo (PrimaryIndexDirectory);
1397 foreach (FileInfo file_info in dir_info.GetFiles ())
1398 if (file_info.Extension == ".cfs")
1399 ++p_count;
1401 dir_info = new DirectoryInfo (SecondaryIndexDirectory);
1402 foreach (FileInfo file_info in dir_info.GetFiles ())
1403 if (file_info.Extension == ".cfs")
1404 ++s_count;
1406 return p_count > s_count ? p_count : s_count;
1410 ///////////////////////////////////////////////////////////////////////////////////
1412 // Cache IndexReaders on a per-Lucene index basis, since they
1413 // are extremely expensive to create. Note that using this
1414 // only makes sense in situations where the index only
1415 // possibly might change from underneath us, but most of the
1416 // time probably won't. This means it makes sense to do
1417 // this in LuceneQueryingDriver.cs, but it doesn't in
1418 // LuceneIndexingDriver.cs.
1420 private class ReaderAndVersion {
1422 public IndexReader Reader;
1423 public long Version;
1424 public int Refcount;
1426 public ReaderAndVersion (IndexReader reader, long version)
1428 this.Reader = reader;
1429 this.Version = version;
1430 this.Refcount = 1;
1434 static private Hashtable directory_rav_map = new Hashtable ();
1435 static private Hashtable reader_rav_map = new Hashtable ();
1437 static public LNS.IndexSearcher GetSearcher (Lucene.Net.Store.Directory directory)
1439 IndexReader reader = GetReader (directory);
1441 return new LNS.IndexSearcher (reader);
1444 static public IndexReader GetReader (Lucene.Net.Store.Directory directory)
1446 IndexReader reader;
1447 long version;
1449 lock (reader_rav_map) {
1450 ReaderAndVersion rav = (ReaderAndVersion) directory_rav_map [directory];
1452 if (rav == null) {
1453 version = IndexReader.GetCurrentVersion (directory);
1454 reader = IndexReader.Open (directory);
1456 rav = new ReaderAndVersion (reader, version);
1457 rav.Refcount++;
1459 directory_rav_map [directory] = rav;
1460 reader_rav_map [reader] = rav;
1462 return reader;
1465 version = IndexReader.GetCurrentVersion (directory);
1467 if (version != rav.Version) {
1468 UnrefReaderAndVersion_Unlocked (rav);
1470 reader = IndexReader.Open (directory);
1472 rav = new ReaderAndVersion (reader, version);
1473 rav.Refcount++;
1475 directory_rav_map [directory] = rav;
1476 reader_rav_map [reader] = rav;
1477 } else
1478 rav.Refcount++;
1480 return rav.Reader;
1484 static private void UnrefReaderAndVersion_Unlocked (ReaderAndVersion rav)
1486 rav.Refcount--;
1488 if (rav.Refcount == 0) {
1489 rav.Reader.Close ();
1490 reader_rav_map.Remove (rav.Reader);
1494 static public void ReleaseReader (IndexReader reader)
1496 lock (reader_rav_map) {
1497 ReaderAndVersion rav = (ReaderAndVersion) reader_rav_map [reader];
1499 UnrefReaderAndVersion_Unlocked (rav);
1503 static public void ReleaseSearcher (LNS.IndexSearcher searcher)
1505 IndexReader reader = searcher.GetIndexReader ();
1507 searcher.Close ();
1508 ReleaseReader (reader);
1511 ///////////////////////////////////////////////////////////////////////////////////
1514 // Various ways to grab lots of hits at once.
1515 // These should never be used for querying, only for utility
1516 // functions.
1519 public int GetBlockOfHits (int cookie,
1520 Hit [] block_of_hits)
1522 IndexReader primary_reader;
1523 IndexReader secondary_reader;
1524 primary_reader = GetReader (PrimaryStore);
1525 secondary_reader = GetReader (SecondaryStore);
1527 int request_size;
1528 request_size = block_of_hits.Length;
1529 if (request_size > primary_reader.NumDocs ())
1530 request_size = primary_reader.NumDocs ();
1532 int max_doc;
1533 max_doc = primary_reader.MaxDoc ();
1535 if (cookie < 0) {
1536 Random random;
1537 random = new Random ();
1538 cookie = random.Next (max_doc);
1541 int original_cookie;
1542 original_cookie = cookie;
1544 Hashtable primary_docs, secondary_docs;
1545 primary_docs = UriFu.NewHashtable ();
1546 secondary_docs = UriFu.NewHashtable ();
1548 // Load the primary documents
1549 for (int i = 0; i < request_size; ++i) {
1551 if (! primary_reader.IsDeleted (cookie)) {
1552 Document doc;
1553 doc = primary_reader.Document (cookie);
1554 primary_docs [GetUriFromDocument (doc)] = doc;
1557 ++cookie;
1558 if (cookie >= max_doc) // wrap around
1559 cookie = 0;
1561 // If we somehow end up back where we started,
1562 // give up.
1563 if (cookie == original_cookie)
1564 break;
1567 // If necessary, load the secondary documents
1568 if (secondary_reader != null) {
1569 LNS.IndexSearcher searcher;
1570 searcher = new LNS.IndexSearcher (secondary_reader);
1572 LNS.Query uri_query;
1573 uri_query = UriQuery ("Uri", primary_docs.Keys);
1575 LNS.Hits hits;
1576 hits = searcher.Search (uri_query);
1577 for (int i = 0; i < hits.Length (); ++i) {
1578 Document doc;
1579 doc = hits.Doc (i);
1580 secondary_docs [GetUriFromDocument (doc)] = doc;
1583 searcher.Close ();
1586 ReleaseReader (primary_reader);
1587 ReleaseReader (secondary_reader);
1589 // Now assemble the hits
1590 int j = 0;
1591 foreach (Uri uri in primary_docs.Keys) {
1592 Document primary_doc, secondary_doc;
1593 primary_doc = primary_docs [uri] as Document;
1594 secondary_doc = secondary_docs [uri] as Document;
1596 Hit hit;
1597 hit = DocumentToHit (primary_doc);
1598 if (secondary_doc != null)
1599 AddPropertiesToHit (hit, secondary_doc, false);
1601 block_of_hits [j] = hit;
1602 ++j;
1605 // null-pad the array, if necessary
1606 for (; j < block_of_hits.Length; ++j)
1607 block_of_hits [j] = null;
1610 // Return the new cookie
1611 return cookie;
1614 // For a large index, this will be very slow and will consume
1615 // a lot of memory. Don't call it without a good reason!
1616 // We return a hashtable indexed by Uri.
1617 public Hashtable GetAllHitsByUri ()
1619 Hashtable all_hits;
1620 all_hits = UriFu.NewHashtable ();
1622 IndexReader primary_reader;
1623 IndexReader secondary_reader;
1624 primary_reader = GetReader (PrimaryStore);
1625 secondary_reader = GetReader (SecondaryStore);
1627 // Load everything from the primary index
1628 int max_doc;
1629 max_doc = primary_reader.MaxDoc ();
1630 for (int i = 0; i < max_doc; ++i) {
1632 if (primary_reader.IsDeleted (i))
1633 continue;
1635 Document doc;
1636 doc = primary_reader.Document (i);
1638 Hit hit;
1639 hit = DocumentToHit (doc);
1640 all_hits [hit.Uri] = hit;
1643 // Now add in everything from the secondary index, if it exists
1644 if (secondary_reader != null) {
1645 max_doc = secondary_reader.MaxDoc ();
1646 for (int i = 0; i < max_doc; ++i) {
1648 if (secondary_reader.IsDeleted (i))
1649 continue;
1651 Document doc;
1652 doc = secondary_reader.Document (i);
1654 Uri uri;
1655 uri = GetUriFromDocument (doc);
1657 Hit hit;
1658 hit = (Hit) all_hits [uri];
1659 if (hit != null)
1660 AddPropertiesToHit (hit, doc, false);
1664 ReleaseReader (primary_reader);
1665 ReleaseReader (secondary_reader);
1667 return all_hits;