Oops, fix a broken part of the patch
[beagle.git] / beagled / LuceneCommon.cs
blob526238511ef9dd1af0702825de636aaf6606ee07
1 //
2 // LuceneCommon.cs
3 //
4 // Copyright (C) 2004-2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.Diagnostics;
30 using System.Globalization;
31 using System.IO;
32 using System.Text;
33 using System.Threading;
34 using System.Xml;
35 using System.Xml.Serialization;
37 using Lucene.Net.Analysis;
38 using Lucene.Net.Analysis.Standard;
39 using Lucene.Net.Documents;
40 using Lucene.Net.Index;
41 using Lucene.Net.QueryParsers;
42 using LNS = Lucene.Net.Search;
44 using Beagle.Util;
46 namespace Beagle.Daemon {
48 public class LuceneCommon {
50 public delegate bool HitFilter (Hit hit);
52 // VERSION HISTORY
53 // ---------------
55 // 1: Original
56 // 2: Changed format of timestamp strings
57 // 3: Schema changed to be more Dashboard-Match-like
58 // 4: Schema changed for files to include _Directory property
59 // 5: Changed analyzer to support stemming. Bumped version # to
60 // force everyone to re-index.
61 // 6: lots of schema changes as part of the general refactoring
62 // 7: incremented to force a re-index after our upgrade to lucene 1.4
63 // (in theory the file formats are compatible, we are seeing 'term
64 // out of order' exceptions in some cases)
65 // 8: another forced re-index, this time because of massive changes
66 // in the file system backend (it would be nice to have per-backend
67 // versioning so that we didn't have to purge all indexes just
68 // because one changed)
69 // 9: changed the way properties are stored, changed in conjunction
70 // with sane handling of multiple properties on hits.
71 // 10: changed to support typed and mutable properties
72 // 11: moved mime type and hit type into properties
73 // 12: added year-month and year-month-day resolutions for all
74 // date properties
75 // 13: moved source into a property
76 // 14: allow wildcard queries to also match keywords
77 private const int MAJOR_VERSION = 14;
78 private int minor_version = 0;
80 private string index_name;
81 private string top_dir;
83 private string fingerprint;
84 private int last_item_count = -1;
86 // This is the big index, containing document full-texts and
87 // data that is expensive to index.
88 private Lucene.Net.Store.Directory primary_store = null;
90 // This is the small index, containing document info that we
91 // expect to have change. Canonical example: file names.
92 private Lucene.Net.Store.Directory secondary_store = null;
94 //////////////////////////////////////////////////////////////////////////////
96 protected LuceneCommon (string index_name, int minor_version)
98 this.index_name = index_name;
99 this.minor_version = minor_version;
101 this.top_dir = (Path.IsPathRooted (index_name)) ? index_name : Path.Combine (PathFinder.IndexDir, index_name);
104 //////////////////////////////////////////////////////////////////////////////
106 protected string IndexName { get { return index_name; } }
108 public Lucene.Net.Store.Directory PrimaryStore { get { return primary_store; } }
110 public Lucene.Net.Store.Directory SecondaryStore { get { return secondary_store; } }
112 public string Fingerprint { get { return fingerprint; } }
114 public string TopDirectory { get { return top_dir; } }
116 //////////////////////////////////////////////////////////////////////////////
118 protected TextCache text_cache = null;
120 public TextCache TextCache {
121 get { return text_cache; }
122 set { text_cache = value; }
125 //////////////////////////////////////////////////////////////////////////////
127 private string VersionFile {
128 get { return Path.Combine (top_dir, "version"); }
131 private string FingerprintFile {
132 get { return Path.Combine (top_dir, "fingerprint"); }
135 // Shouldn't really be public
136 public string PrimaryIndexDirectory {
137 get { return Path.Combine (top_dir, "PrimaryIndex"); }
140 // Shouldn't really be public
141 public string SecondaryIndexDirectory {
142 get { return Path.Combine (top_dir, "SecondaryIndex"); }
145 public string LockDirectory {
146 get { return Path.Combine (top_dir, "Locks"); }
149 //////////////////////////////////////////////////////////////////////////////
151 // Deal with dangling locks
153 private bool IsDanglingLock (FileInfo info)
155 // It isn't even a lock file
156 if (! info.Name.EndsWith (".lock"))
157 return false;
159 StreamReader reader;
160 string pid = null;
162 try {
163 reader = new StreamReader (info.FullName);
164 pid = reader.ReadLine ();
165 reader.Close ();
167 } catch {
168 // We couldn't read the lockfile, so it probably went away.
169 return false;
172 string cmdline_file;
173 cmdline_file = String.Format ("/proc/{0}/cmdline", pid);
175 string cmdline = "";
176 try {
177 reader = new StreamReader (cmdline_file);
178 cmdline = reader.ReadLine ();
179 reader.Close ();
180 } catch {
181 // If we can't open that file, either:
182 // (1) The process doesn't exist
183 // (2) It does exist, but it doesn't belong to us.
184 // Thus it isn't an IndexHelper
185 // In either case, the lock is dangling --- if it
186 // still exists.
187 return info.Exists;
190 // The process exists, but isn't an IndexHelper.
191 // If the lock file is still there, it is dangling.
192 // FIXME: During one run of bludgeon I got a null reference
193 // exception here, so I added the cmdline == null check.
194 // Why exactly would that happen? Is this logic correct
195 // in that (odd and presumably rare) case?
196 if (cmdline == null || cmdline.IndexOf ("IndexHelper.exe") == -1)
197 return info.Exists;
199 // If we reach this point, we know:
200 // (1) The process still exists
201 // (2) We own it
202 // (3) It is an IndexHelper process
203 // Thus it almost certainly isn't a dangling lock.
204 // The process might be wedged, but that is
205 // another issue...
206 return false;
210 // Return true if there are dangling locks
211 protected bool HaveDanglingLocks ()
213 return false;
216 protected bool Exists ()
218 if (! (Directory.Exists (top_dir)
219 && File.Exists (VersionFile)
220 && File.Exists (FingerprintFile)
221 && Directory.Exists (PrimaryIndexDirectory)
222 && IndexReader.IndexExists (PrimaryIndexDirectory)
223 && Directory.Exists (SecondaryIndexDirectory)
224 && IndexReader.IndexExists (SecondaryIndexDirectory)
225 && Directory.Exists (LockDirectory)))
226 return false;
228 // Check the index's version number. If it is wrong,
229 // declare the index non-existent.
231 StreamReader version_reader;
232 string version_str;
233 version_reader = new StreamReader (VersionFile);
234 version_str = version_reader.ReadLine ();
235 version_reader.Close ();
237 int current_major_version, current_minor_version;
238 int i = version_str.IndexOf ('.');
240 if (i != -1) {
241 current_major_version = Convert.ToInt32 (version_str.Substring (0, i));
242 current_minor_version = Convert.ToInt32 (version_str.Substring (i+1));
243 } else {
244 current_minor_version = Convert.ToInt32 (version_str);
245 current_major_version = 0;
248 if (current_major_version != MAJOR_VERSION
249 || (minor_version >= 0 && current_minor_version != minor_version)) {
250 Logger.Log.Debug ("Version mismatch in {0}", index_name);
251 Logger.Log.Debug ("Index has version {0}.{1}, expected {2}.{3}",
252 current_major_version, current_minor_version,
253 MAJOR_VERSION, minor_version);
254 return false;
257 // Check the lock directory: If there is a dangling write lock,
258 // assume that the index is corrupted and declare it non-existent.
259 DirectoryInfo lock_dir_info;
260 lock_dir_info = new DirectoryInfo (LockDirectory);
261 foreach (FileInfo info in lock_dir_info.GetFiles ()) {
262 if (IsDanglingLock (info)) {
263 Logger.Log.Warn ("Found a dangling index lock on {0}", info.FullName);
264 return false;
268 return true;
271 private Lucene.Net.Store.Directory CreateIndex (string path)
273 // Create a directory to put the index in.
274 Directory.CreateDirectory (path);
276 // Create a new store.
277 Lucene.Net.Store.Directory store;
278 store = Lucene.Net.Store.FSDirectory.GetDirectory (path, LockDirectory, true);
280 // Create an empty index in that store.
281 IndexWriter writer;
282 writer = new IndexWriter (store, null, true);
283 writer.Close ();
285 return store;
288 // Create will kill your index dead. Use it with care.
289 // You don't need to call Open after calling Create.
290 protected void Create ()
292 if (minor_version < 0)
293 minor_version = 0;
295 // Purge any existing directories.
296 if (Directory.Exists (top_dir)) {
297 Logger.Log.Debug ("Purging {0}", top_dir);
298 Directory.Delete (top_dir, true);
301 // Create any necessary directories.
302 Directory.CreateDirectory (top_dir);
303 Directory.CreateDirectory (LockDirectory);
305 // Create the indexes.
306 primary_store = CreateIndex (PrimaryIndexDirectory);
307 secondary_store = CreateIndex (SecondaryIndexDirectory);
309 // Generate and store the index fingerprint.
310 fingerprint = GuidFu.ToShortString (Guid.NewGuid ());
311 TextWriter writer;
312 writer = new StreamWriter (FingerprintFile, false);
313 writer.WriteLine (fingerprint);
314 writer.Close ();
316 // Store our index version information.
317 writer = new StreamWriter (VersionFile, false);
318 writer.WriteLine ("{0}.{1}", MAJOR_VERSION, minor_version);
319 writer.Close ();
322 protected void Open ()
324 Open (false);
327 protected void Open (bool read_only_mode)
329 // Read our index fingerprint.
330 TextReader reader;
331 reader = new StreamReader (FingerprintFile);
332 fingerprint = reader.ReadLine ();
333 reader.Close ();
335 // Create stores for our indexes.
336 primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, LockDirectory, false, read_only_mode);
337 secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, LockDirectory, false, read_only_mode);
340 ////////////////////////////////////////////////////////////////
343 // Custom Analyzers
346 private class SingletonTokenStream : TokenStream {
348 private string singleton_str;
350 public SingletonTokenStream (string singleton_str)
352 this.singleton_str = singleton_str;
355 override public Lucene.Net.Analysis.Token Next ()
357 if (singleton_str == null)
358 return null;
360 Lucene.Net.Analysis.Token token;
361 token = new Lucene.Net.Analysis.Token (singleton_str, 0, singleton_str.Length);
363 singleton_str = null;
365 return token;
369 // FIXME: This assumes everything being indexed is in English!
370 private class BeagleAnalyzer : StandardAnalyzer {
372 private char [] buffer = new char [2];
373 private bool strip_extra_property_info = false;
375 public BeagleAnalyzer (bool strip_extra_property_info)
377 this.strip_extra_property_info = strip_extra_property_info;
380 public override TokenStream TokenStream (string fieldName, TextReader reader)
382 bool is_text_prop = false;
384 // Strip off the first two characters in a property.
385 // We store type information in those two characters, so we don't
386 // want to index them.
387 if (fieldName.StartsWith ("prop:")) {
389 if (strip_extra_property_info) {
390 // Skip everything up to and including the first :
391 int c;
392 do {
393 c = reader.Read ();
394 } while (c != -1 && c != ':');
397 is_text_prop = fieldName.StartsWith ("prop:t");
399 // If this is non-text property, just return one token
400 // containing the entire string. We do this to avoid
401 // tokenizing keywords.
402 if (! is_text_prop)
403 return new SingletonTokenStream (reader.ReadToEnd ());
406 TokenStream outstream;
407 outstream = base.TokenStream (fieldName, reader);
409 if (fieldName == "Text"
410 || fieldName == "HotText"
411 || fieldName == "PropertyText"
412 || is_text_prop) {
413 outstream = new NoiseFilter (outstream);
414 outstream = new PorterStemFilter (outstream);
417 return outstream;
421 static private Analyzer indexing_analyzer = new BeagleAnalyzer (true);
422 static private Analyzer query_analyzer = new BeagleAnalyzer (false);
424 static protected Analyzer IndexingAnalyzer { get { return indexing_analyzer; } }
425 static protected Analyzer QueryAnalyzer { get { return query_analyzer; } }
427 ////////////////////////////////////////////////////////////////
430 // Dealing with properties
433 static private char TypeToCode (PropertyType type)
435 switch (type) {
436 case PropertyType.Text: return 't';
437 case PropertyType.Keyword: return 'k';
438 case PropertyType.Date: return 'd';
440 throw new Exception ("Bad property type: " + type);
443 static private PropertyType CodeToType (char c)
445 switch (c) {
446 case 't': return PropertyType.Text;
447 case 'k': return PropertyType.Keyword;
448 case 'd': return PropertyType.Date;
451 throw new Exception ("Bad property code: " + c);
454 static private string TypeToWildcardField (PropertyType type)
456 switch (type) {
457 case PropertyType.Text: return "PropertyText";
458 case PropertyType.Keyword: return "PropertyKeyword";
459 case PropertyType.Date: return "PropertyDate";
462 return null;
465 // Exposing this is a little bit suspicious.
466 static protected string PropertyToFieldName (PropertyType type, string key)
468 return String.Format ("prop:{0}:{1}", TypeToCode (type), key);
472 static private void AddDateFields (string field_name, Property prop, Document doc)
474 DateTime dt = StringFu.StringToDateTime (prop.Value);
476 Field f;
477 f = new Field ("YM:" + field_name,
478 StringFu.DateTimeToYearMonthString (dt),
479 false, // never store
480 true, // always index
481 false); // never tokenize
482 doc.Add (f);
484 f = new Field ("D:" + field_name,
485 StringFu.DateTimeToDayString (dt),
486 false, // never store
487 true, // always index
488 false); // never tokenize
489 doc.Add (f);
492 static protected void AddPropertyToDocument (Property prop, Document doc)
494 if (prop == null || prop.Value == null)
495 return;
497 // Don't actually put properties in the UnindexedNamespace
498 // in the document. A horrible (and yet lovely!) hack.
499 if (prop.Key.StartsWith (StringFu.UnindexedNamespace))
500 return;
502 Field f;
504 if (prop.IsSearched) {
505 string wildcard_field = TypeToWildcardField (prop.Type);
506 bool tokenize = (prop.Type == PropertyType.Text);
507 if (wildcard_field != null) {
508 f = new Field (wildcard_field,
509 prop.Value,
510 false, // never stored
511 true, // always indexed
512 tokenize);
513 doc.Add (f);
515 if (prop.Type == PropertyType.Date)
516 AddDateFields (wildcard_field, prop, doc);
520 string coded_value;
521 coded_value = String.Format ("{0}:{1}",
522 prop.IsSearched ? 's' : '_',
523 prop.Value);
525 string field_name = PropertyToFieldName (prop.Type, prop.Key);
527 f = new Field (field_name,
528 coded_value,
529 prop.IsStored,
530 true, // always index
531 true); // always tokenize (just strips off type code for keywords)
532 doc.Add (f);
534 if (prop.Type == PropertyType.Date)
535 AddDateFields (field_name, prop, doc);
538 static protected Property GetPropertyFromDocument (Field f, Document doc, bool from_primary_index)
540 // Note: we don't use the document that we pass in,
541 // but in theory we could. At some later point we
542 // might need to split a property's data across two or
543 // more fields in the document.
545 if (f == null)
546 return null;
548 string field_name;
549 field_name = f.Name ();
550 if (field_name.Length < 7
551 || ! field_name.StartsWith ("prop:"))
552 return null;
554 string field_value;
555 field_value = f.StringValue ();
557 Property prop;
558 prop = new Property ();
559 prop.Type = CodeToType (field_name [5]);
560 prop.Key = field_name.Substring (7);
561 prop.Value = field_value.Substring (2);
562 prop.IsSearched = (field_value [0] == 's');
563 prop.IsMutable = ! from_primary_index;
564 prop.IsStored = f.IsStored ();
566 return prop;
569 //////////////////////////////////////////////////////////////////////////////
572 // Dealing with documents
575 static protected void BuildDocuments (Indexable indexable,
576 out Document primary_doc,
577 out Document secondary_doc)
579 primary_doc = new Document ();
580 secondary_doc = null;
582 Field f;
584 f = Field.Keyword ("Uri", UriFu.UriToSerializableString (indexable.Uri));
585 primary_doc.Add (f);
587 if (indexable.ParentUri != null) {
588 f = Field.Keyword ("ParentUri", UriFu.UriToSerializableString (indexable.ParentUri));
589 primary_doc.Add (f);
592 if (indexable.ValidTimestamp) {
593 // Note that we also want to search in the
594 // Timestamp field when we do a wildcard date
595 // query, so that's why we also add a wildcard
596 // field for each item here.
598 string wildcard_field = TypeToWildcardField (PropertyType.Date);
600 string str = StringFu.DateTimeToString (indexable.Timestamp);
601 f = Field.Keyword ("Timestamp", str);
602 primary_doc.Add (f);
603 f = Field.UnStored (wildcard_field, str);
604 primary_doc.Add (f);
606 str = StringFu.DateTimeToYearMonthString (indexable.Timestamp);
607 f = Field.Keyword ("YM:Timestamp", str);
608 primary_doc.Add (f);
609 f = Field.UnStored ("YM:" + wildcard_field, str);
610 primary_doc.Add (f);
612 str = StringFu.DateTimeToDayString (indexable.Timestamp);
613 f = Field.Keyword ("D:Timestamp", str);
614 primary_doc.Add (f);
615 f = Field.UnStored ("D:" + wildcard_field, str);
616 primary_doc.Add (f);
619 if (indexable.NoContent) {
620 // If there is no content, make a note of that
621 // in a special property.
622 Property prop;
623 prop = Property.NewBool ("beagle:NoContent", true);
624 AddPropertyToDocument (prop, primary_doc);
626 } else {
628 // Since we might have content, add our text
629 // readers.
631 TextReader reader;
633 reader = indexable.GetTextReader ();
634 if (reader != null) {
635 f = Field.Text ("Text", reader);
636 primary_doc.Add (f);
639 reader = indexable.GetHotTextReader ();
640 if (reader != null) {
641 f = Field.Text ("HotText", reader);
642 primary_doc.Add (f);
646 // Store the Type and MimeType in special properties
648 if (indexable.HitType != null) {
649 Property prop;
650 prop = Property.NewUnsearched ("beagle:HitType", indexable.HitType);
651 AddPropertyToDocument (prop, primary_doc);
654 if (indexable.MimeType != null) {
655 Property prop;
656 prop = Property.NewUnsearched ("beagle:MimeType", indexable.MimeType);
657 AddPropertyToDocument (prop, primary_doc);
660 if (indexable.Source != null) {
661 Property prop;
662 prop = Property.NewUnsearched ("beagle:Source", indexable.Source);
663 AddPropertyToDocument (prop, primary_doc);
666 // Store the other properties
668 foreach (Property prop in indexable.Properties) {
669 Document target_doc = primary_doc;
670 if (prop.IsMutable) {
671 if (secondary_doc == null) {
672 secondary_doc = new Document ();
673 f = Field.Keyword ("Uri", UriFu.UriToSerializableString (indexable.Uri));
674 secondary_doc.Add (f);
676 target_doc = secondary_doc;
679 AddPropertyToDocument (prop, target_doc);
683 static protected Document RewriteDocument (Document old_secondary_doc,
684 Indexable prop_only_indexable)
686 Hashtable seen_props;
687 seen_props = new Hashtable ();
689 Document new_doc;
690 new_doc = new Document ();
692 Field uri_f;
693 uri_f = Field.Keyword ("Uri", UriFu.UriToSerializableString (prop_only_indexable.Uri));
694 new_doc.Add (uri_f);
696 Logger.Log.Debug ("Rewriting {0}", prop_only_indexable.DisplayUri);
698 // Add the new properties to the new document. To
699 // delete a property, set the Value to null... then it
700 // will be added to seen_props (so the old value will
701 // be ignored below), but AddPropertyToDocument will
702 // return w/o doing anything.
703 foreach (Property prop in prop_only_indexable.Properties) {
704 seen_props [prop.Key] = prop;
705 AddPropertyToDocument (prop, new_doc);
706 Logger.Log.Debug ("New prop '{0}' = '{1}'", prop.Key, prop.Value);
709 // Copy the other properties from the old document to the
710 // new one, skipping any properties that we got new values
711 // for out of the Indexable.
712 if (old_secondary_doc != null) {
713 foreach (Field f in old_secondary_doc.Fields ()) {
714 Property prop;
715 prop = GetPropertyFromDocument (f, old_secondary_doc, false);
716 if (prop != null && ! seen_props.Contains (prop.Key)) {
717 Logger.Log.Debug ("Old prop '{0}' = '{1}'", prop.Key, prop.Value);
718 AddPropertyToDocument (prop, new_doc);
723 return new_doc;
726 static protected Uri GetUriFromDocument (Document doc)
728 string uri;
729 uri = doc.Get ("Uri");
730 if (uri == null)
731 throw new Exception ("Got document from Lucene w/o a URI!");
732 return UriFu.UriStringToUri (uri);
735 static protected Hit DocumentToHit (Document doc)
737 Hit hit;
738 hit = new Hit ();
740 hit.Uri = GetUriFromDocument (doc);
742 string str;
743 str = doc.Get ("ParentUri");
744 if (str != null)
745 hit.ParentUri = UriFu.UriStringToUri (str);
747 hit.Timestamp = StringFu.StringToDateTime (doc.Get ("Timestamp"));
749 AddPropertiesToHit (hit, doc, true);
751 // Get the Type and MimeType from the properties.
752 hit.Type = hit.GetFirstProperty ("beagle:HitType");
753 hit.MimeType = hit.GetFirstProperty ("beagle:MimeType");
754 hit.Source = hit.GetFirstProperty ("beagle:Source");
756 return hit;
759 static protected void AddPropertiesToHit (Hit hit, Document doc, bool from_primary_index)
761 foreach (Field f in doc.Fields ()) {
762 Property prop;
763 prop = GetPropertyFromDocument (f, doc, from_primary_index);
764 if (prop != null)
765 hit.AddProperty (prop);
770 //////////////////////////////////////////////////////////////////////////////
773 // Handle the index's item count
776 public int GetItemCount ()
778 if (last_item_count < 0) {
779 IndexReader reader;
780 reader = GetReader (PrimaryStore);
781 last_item_count = reader.NumDocs ();
782 ReleaseReader (reader);
784 return last_item_count;
787 // We should set the cached count of index items when IndexReaders
788 // are open and available, so calls to GetItemCount will return immediately.
790 protected bool HaveItemCount { get { return last_item_count >= 0; } }
792 protected void SetItemCount (IndexReader reader)
794 last_item_count = reader.NumDocs ();
797 public void SetItemCount (int count)
799 last_item_count = count;
802 protected void AdjustItemCount (int delta)
804 if (last_item_count >= 0)
805 last_item_count += delta;
808 //////////////////////////////////////////////////////////////////////////////
811 // Access to the stemmer and list of stop words
814 static PorterStemmer stemmer = new PorterStemmer ();
816 static public string Stem (string str)
818 return stemmer.Stem (str);
821 public static bool IsStopWord (string stemmed_word)
823 return ArrayFu.IndexOfString (StopAnalyzer.ENGLISH_STOP_WORDS, stemmed_word) != -1;
826 //////////////////////////////////////////////////////////////////////////////
829 // Special Hit Filtering classes
832 static private bool TrueHitFilter (Hit hit)
834 return true;
837 static private HitFilter true_hit_filter = new HitFilter (TrueHitFilter);
839 public class OrHitFilter {
841 private ArrayList all = new ArrayList ();
842 private bool contains_known_true = false;
844 public void Add (HitFilter hit_filter)
846 if (hit_filter == true_hit_filter)
847 contains_known_true = true;
848 all.Add (hit_filter);
851 public bool HitFilter (Hit hit)
853 if (contains_known_true)
854 return true;
855 foreach (HitFilter hit_filter in all)
856 if (hit_filter (hit))
857 return true;
858 return false;
862 public class AndHitFilter {
864 private ArrayList all = new ArrayList ();
866 public void Add (HitFilter hit_filter)
868 all.Add (hit_filter);
871 public bool HitFilter (Hit hit)
873 foreach (HitFilter hit_filter in all)
874 if (! hit_filter (hit))
875 return false;
876 return true;
880 public class NotHitFilter {
881 HitFilter original;
883 public NotHitFilter (HitFilter original)
885 this.original = original;
888 public bool HitFilter (Hit hit)
890 return ! original (hit);
894 //////////////////////////////////////////////////////////////////////////////
897 // Queries
900 static private LNS.Query StringToQuery (string field_name,
901 string text,
902 ArrayList term_list)
904 ArrayList tokens = new ArrayList ();
906 // Use the analyzer to extract the query's tokens.
907 // This code is taken from Lucene's query parser.
908 TokenStream source = QueryAnalyzer.TokenStream (field_name, new StringReader (text));
909 while (true) {
910 Lucene.Net.Analysis.Token token;
911 try {
912 token = source.Next ();
913 if (token == null)
914 break;
915 } catch (IOException) {
916 break;
918 if (token != null)
919 tokens.Add (token.TermText ());
921 try {
922 source.Close ();
923 } catch (IOException) {
924 // ignore
927 if (tokens.Count == 0)
928 return null;
930 LNS.PhraseQuery query = new LNS.PhraseQuery ();
932 foreach (string token in tokens) {
933 Term term;
934 term = new Term (field_name, token);
935 query.Add (term);
936 if (term_list != null)
937 term_list.Add (term);
940 return query;
944 // Date Range Handling
947 // This function will break down dates to discrete chunks of
948 // time to avoid expanding RangeQuerys as much as possible.
949 // For example, searching for
951 // YMD(5 May 2005, 16 Oct 2006)
953 // would break down into three queries:
955 // (YM(May 2005) AND D(5,31)) OR
956 // YM(Jun 2005, Sep 2006) OR
957 // (YM(Oct 2006) AND D(1,16))
959 static private DateTime lower_bound = new DateTime (1970, 1, 1);
961 // FIXME: we should probably boost this sometime around 2030.
962 // Mark your calendar.
963 static private DateTime upper_bound = new DateTime (2038, 12, 31);
965 static private Term NewYearMonthTerm (string field_name, int y, int m)
967 return new Term ("YM:" + field_name, String.Format ("{0}{1:00}", y, m));
970 static private LNS.Query NewYearMonthQuery (string field_name, int y, int m)
972 return new LNS.TermQuery (NewYearMonthTerm (field_name, y, m));
975 static private LNS.Query NewYearMonthQuery (string field_name, int y1, int m1, int y2, int m2)
977 return new LNS.RangeQuery (NewYearMonthTerm (field_name, y1, m1),
978 NewYearMonthTerm (field_name, y2, m2),
979 true); // query is inclusive
982 static private Term NewDayTerm (string field_name, int d)
984 return new Term ("D:" + field_name, String.Format ("{0:00}", d));
987 static private LNS.Query NewDayQuery (string field_name, int d1, int d2)
989 return new LNS.RangeQuery (NewDayTerm (field_name, d1),
990 NewDayTerm (field_name, d2),
991 true); // query is inclusive
994 private class DateRangeHitFilter {
995 public string Key;
996 public DateTime StartDate;
997 public DateTime EndDate;
999 public bool HitFilter (Hit hit)
1001 // First, check the Timestamp
1002 if (Key == QueryPart_DateRange.AllPropertiesKey
1003 || Key == QueryPart_DateRange.TimestampKey) {
1004 DateTime dt;
1005 dt = hit.Timestamp;
1006 if (StartDate <= dt && dt <= EndDate)
1007 return true;
1008 if (Key == QueryPart_DateRange.TimestampKey)
1009 return false;
1012 if (Key == QueryPart_DateRange.AllPropertiesKey) {
1013 // Walk through all of the properties, and see if any
1014 // date properties fall inside the range.
1015 foreach (Property prop in hit.Properties) {
1016 if (prop.Type == PropertyType.Date) {
1017 DateTime dt;
1018 dt = StringFu.StringToDateTime (prop.Value);
1019 if (StartDate <= dt && dt <= EndDate)
1020 return true;
1023 return false;
1024 } else {
1025 // Walk through all of the properties with the given key,
1026 // and see if any of them fall inside of the range.
1027 string[] values;
1028 values = hit.GetProperties (Key);
1029 foreach (string v in values) {
1030 DateTime dt;
1031 dt = StringFu.StringToDateTime (v);
1032 if (StartDate <= dt && dt <= EndDate)
1033 return true;
1035 return false;
1040 static private LNS.Query GetDateRangeQuery (QueryPart_DateRange part, out HitFilter hit_filter)
1042 string field_name;
1043 if (part.Key == QueryPart_DateRange.AllPropertiesKey)
1044 field_name = TypeToWildcardField (PropertyType.Date);
1045 else if (part.Key == QueryPart_DateRange.TimestampKey)
1046 field_name = "Timestamp";
1047 else
1048 field_name = PropertyToFieldName (PropertyType.Date, part.Key);
1050 // FIXME: We could optimize this and reduce the size of our range
1051 // queries if we actually new the min and max date that appear in
1052 // any properties in the index. We would need to inspect the index to
1053 // determine that at start-up, and then track it as new documents
1054 // get added to the index.
1055 if (part.StartDate < lower_bound)
1056 part.StartDate = lower_bound;
1057 if (part.EndDate > upper_bound || part.EndDate == DateTime.MinValue)
1058 part.EndDate = upper_bound;
1060 // Swap the start and end dates if they come in reversed.
1061 if (part.StartDate > part.EndDate) {
1062 DateTime swap;
1063 swap = part.StartDate;
1064 part.StartDate = part.EndDate;
1065 part.EndDate = swap;
1068 // Set up our hit filter to cull out the bad dates.
1069 DateRangeHitFilter drhf;
1070 drhf = new DateRangeHitFilter ();
1071 drhf.Key = part.Key;
1072 drhf.StartDate = part.StartDate;
1073 drhf.EndDate = part.EndDate;
1074 hit_filter = new HitFilter (drhf.HitFilter);
1076 Logger.Log.Debug ("Building new date range query");
1077 Logger.Log.Debug ("Start: {0}", part.StartDate);
1078 Logger.Log.Debug ("End: {0}", part.EndDate);
1080 int y1, m1, d1, y2, m2, d2;
1081 y1 = part.StartDate.Year;
1082 m1 = part.StartDate.Month;
1083 d1 = part.StartDate.Day;
1084 y2 = part.EndDate.Year;
1085 m2 = part.EndDate.Month;
1086 d2 = part.EndDate.Day;
1088 LNS.BooleanQuery top_level_query;
1089 top_level_query = new LNS.BooleanQuery ();
1091 // A special case: both the start and the end of our range fall
1092 // in the same month.
1093 if (y1 == y2 && m1 == m2) {
1094 LNS.Query ym_query;
1095 ym_query = NewYearMonthQuery (field_name, y1, m1);
1097 // If our range only covers a part of the month, do a range query on the days.
1098 if (d1 != 1 || d2 != DateTime.DaysInMonth (y2, m2)) {
1099 LNS.BooleanQuery sub_query;
1100 sub_query = new LNS.BooleanQuery ();
1101 sub_query.Add (ym_query, true, false);
1102 sub_query.Add (NewDayQuery (field_name, d1, d2), true, false);
1103 top_level_query.Add (sub_query, false, false);
1104 } else {
1105 top_level_query.Add (ym_query, false, false);
1108 } else {
1110 // Handle a partial month at the beginning of our range.
1111 if (d1 > 1) {
1112 LNS.BooleanQuery sub_query;
1113 sub_query = new LNS.BooleanQuery ();
1114 sub_query.Add (NewYearMonthQuery (field_name, y1, m1), true, false);
1115 sub_query.Add (NewDayQuery (field_name, d1, DateTime.DaysInMonth (y1, m1)), true, false);
1116 top_level_query.Add (sub_query, false, false);
1118 ++m1;
1119 if (m1 == 13) {
1120 m1 = 1;
1121 ++y1;
1125 // And likewise, handle a partial month at the end of our range.
1126 if (d2 < DateTime.DaysInMonth (y2, m2)) {
1127 LNS.BooleanQuery sub_query;
1128 sub_query = new LNS.BooleanQuery ();
1129 sub_query.Add (NewYearMonthQuery (field_name, y2, m2), true, false);
1130 sub_query.Add (NewDayQuery (field_name, 1, d2), true, false);
1131 top_level_query.Add (sub_query, false, false);
1133 --m2;
1134 if (m2 == 0) {
1135 m2 = 12;
1136 --y2;
1140 // Generate the query for the "middle" of our period, if it is non-empty
1141 if (y1 < y2 || ((y1 == y2) && m1 <= m2))
1142 top_level_query.Add (NewYearMonthQuery (field_name, y1, m1, y2, m2),
1143 false, false);
1146 return top_level_query;
1149 // search_subset_uris is a list of Uris that this search should be
1150 // limited to.
1151 static protected void QueryPartToQuery (QueryPart abstract_part,
1152 bool only_build_primary_query,
1153 ArrayList term_list,
1154 out LNS.Query primary_query,
1155 out LNS.Query secondary_query,
1156 out HitFilter hit_filter)
1158 primary_query = null;
1159 secondary_query = null;
1161 // By default, we assume that our lucene queries will return exactly the
1162 // matching set of objects. We need to set the hit filter if further
1163 // refinement of the search results is required. (As in the case of
1164 // date range queries, for example.) We essentially have to do this
1165 // to make OR queries work correctly.
1166 hit_filter = true_hit_filter;
1168 // The exception is when dealing with a prohibited part. Just return
1169 // null for the hit filter in that case. This works since
1170 // prohibited parts are not allowed inside of OR queries.
1171 if (abstract_part.Logic == QueryPartLogic.Prohibited)
1172 hit_filter = null;
1174 if (abstract_part == null)
1175 return;
1177 if (abstract_part is QueryPart_Text) {
1178 QueryPart_Text part = (QueryPart_Text) abstract_part;
1180 if (! (part.SearchFullText || part.SearchTextProperties))
1181 return;
1183 LNS.BooleanQuery p_query = new LNS.BooleanQuery ();
1184 LNS.BooleanQuery s_query = new LNS.BooleanQuery ();
1186 if (part.SearchFullText) {
1187 LNS.Query subquery;
1188 subquery = StringToQuery ("Text", part.Text, term_list);
1189 if (subquery != null)
1190 p_query.Add (subquery, false, false);
1192 // FIXME: HotText is ignored for now!
1193 // subquery = StringToQuery ("HotText", part.Text);
1194 // if (subquery != null)
1195 // p_query.Add (subquery, false, false);
1198 if (part.SearchTextProperties) {
1199 LNS.Query subquery;
1200 subquery = StringToQuery ("PropertyText", part.Text, term_list);
1201 if (subquery != null) {
1202 p_query.Add (subquery, false, false);
1203 // Properties can live in either index
1204 if (! only_build_primary_query)
1205 s_query.Add (subquery.Clone () as LNS.Query, false, false);
1208 Term term;
1209 term = new Term ("PropertyKeyword", part.Text);
1210 // FIXME: terms are already added in term_list. But they may have been tokenized
1211 // The term here is non-tokenized version. Should this be added to term_list ?
1212 // term_list is used to calculate scores
1213 if (term_list != null)
1214 term_list.Add (term);
1215 subquery = new LNS.TermQuery (term);
1216 p_query.Add (subquery, false, false);
1217 // Properties can live in either index
1218 if (! only_build_primary_query)
1219 s_query.Add (subquery.Clone () as LNS.Query, false, false);
1222 primary_query = p_query;
1223 if (! only_build_primary_query)
1224 secondary_query = s_query;
1226 return;
1229 if (abstract_part is QueryPart_Property) {
1230 QueryPart_Property part = (QueryPart_Property) abstract_part;
1232 string field_name;
1233 if (part.Key == QueryPart_Property.AllProperties) {
1234 field_name = TypeToWildcardField (part.Type);
1235 // FIXME: probably shouldn't just return silently
1236 if (field_name == null)
1237 return;
1238 } else
1239 field_name = PropertyToFieldName (part.Type, part.Key);
1241 if (part.Type == PropertyType.Text)
1242 primary_query = StringToQuery (field_name, part.Value, term_list);
1243 else {
1244 Term term;
1245 term = new Term (field_name, part.Value);
1246 if (term_list != null)
1247 term_list.Add (term);
1248 primary_query = new LNS.TermQuery (term);
1251 // Properties can live in either index
1252 if (! only_build_primary_query && primary_query != null)
1253 secondary_query = primary_query.Clone () as LNS.Query;
1255 return;
1258 if (abstract_part is QueryPart_DateRange) {
1260 QueryPart_DateRange part = (QueryPart_DateRange) abstract_part;
1262 primary_query = GetDateRangeQuery (part, out hit_filter);
1263 // Date properties can live in either index
1264 if (! only_build_primary_query && primary_query != null)
1265 secondary_query = primary_query.Clone () as LNS.Query;
1267 // If this is a prohibited part, invert our hit filter.
1268 if (part.Logic == QueryPartLogic.Prohibited) {
1269 NotHitFilter nhf;
1270 nhf = new NotHitFilter (hit_filter);
1271 hit_filter = new HitFilter (nhf.HitFilter);
1274 return;
1277 if (abstract_part is QueryPart_Or) {
1278 QueryPart_Or part = (QueryPart_Or) abstract_part;
1280 // Assemble a new BooleanQuery combining all of the sub-parts.
1281 LNS.BooleanQuery p_query;
1282 p_query = new LNS.BooleanQuery ();
1284 LNS.BooleanQuery s_query = null;
1285 if (! only_build_primary_query)
1286 s_query = new LNS.BooleanQuery ();
1288 primary_query = p_query;
1289 secondary_query = s_query;
1291 OrHitFilter or_hit_filter = null;
1293 foreach (QueryPart sub_part in part.SubParts) {
1294 LNS.Query p_subq, s_subq;
1295 HitFilter sub_hit_filter; // FIXME: This is (and must be) ignored
1296 // FIXME: Any subpart in an OR which has a hit filter won't work
1297 // correctly, because we can't tell which part of an OR we matched
1298 // against to filter correctly. This affects date range queries.
1299 QueryPartToQuery (sub_part, only_build_primary_query,
1300 term_list,
1301 out p_subq, out s_subq, out sub_hit_filter);
1302 if (p_subq != null)
1303 p_query.Add (p_subq, false, false);
1304 if (s_subq != null)
1305 s_query.Add (s_subq, false, false);
1306 if (sub_hit_filter != null) {
1307 if (or_hit_filter == null)
1308 or_hit_filter = new OrHitFilter ();
1309 or_hit_filter.Add (sub_hit_filter);
1313 if (or_hit_filter != null)
1314 hit_filter = new HitFilter (or_hit_filter.HitFilter);
1316 return;
1319 throw new Exception ("Unhandled QueryPart type! " + abstract_part.ToString ());
1322 static protected LNS.Query UriQuery (string field_name, Uri uri)
1324 return new LNS.TermQuery (new Term (field_name, UriFu.UriToSerializableString (uri)));
1327 static protected LNS.Query UriQuery (string field_name, ICollection uri_list)
1329 return UriQuery (field_name, uri_list, null);
1332 static protected LNS.Query UriQuery (string field_name, ICollection uri_list, LNS.Query extra_requirement)
1334 if (uri_list.Count == 0)
1335 return null;
1337 int max_clauses;
1338 max_clauses = LNS.BooleanQuery.GetMaxClauseCount ();
1340 int N;
1341 N = 1 + (uri_list.Count - 1) / max_clauses;
1343 LNS.BooleanQuery top_query;
1344 top_query = new LNS.BooleanQuery ();
1346 int cursor = 0;
1347 if (extra_requirement != null) {
1348 top_query.Add (extra_requirement, true, false);
1349 ++cursor;
1352 ArrayList bottom_queries = null;
1354 if (N > 1) {
1355 bottom_queries = new ArrayList ();
1356 for (int i = 0; i < N; ++i) {
1357 LNS.BooleanQuery bq;
1358 bq = new LNS.BooleanQuery ();
1359 bottom_queries.Add (bq);
1360 top_query.Add (bq, false, false);
1364 foreach (Uri uri in uri_list) {
1365 LNS.Query subquery;
1366 subquery = UriQuery (field_name, uri);
1368 LNS.BooleanQuery target;
1369 if (N == 1)
1370 target = top_query;
1371 else {
1372 target = (LNS.BooleanQuery) bottom_queries [cursor];
1373 ++cursor;
1374 if (cursor >= N)
1375 cursor = 0;
1378 target.Add (subquery, false, false);
1381 return top_query;
1384 ///////////////////////////////////////////////////////////////////////////////////
1386 public int SegmentCount {
1387 get {
1388 DirectoryInfo dir_info;
1389 int p_count = 0, s_count = 0;
1391 dir_info = new DirectoryInfo (PrimaryIndexDirectory);
1392 foreach (FileInfo file_info in dir_info.GetFiles ())
1393 if (file_info.Extension == ".cfs")
1394 ++p_count;
1396 dir_info = new DirectoryInfo (SecondaryIndexDirectory);
1397 foreach (FileInfo file_info in dir_info.GetFiles ())
1398 if (file_info.Extension == ".cfs")
1399 ++s_count;
1401 return p_count > s_count ? p_count : s_count;
1405 ///////////////////////////////////////////////////////////////////////////////////
1407 // Cache IndexReaders on a per-Lucene index basis, since they
1408 // are extremely expensive to create. Note that using this
1409 // only makes sense in situations where the index only
1410 // possibly might change from underneath us, but most of the
1411 // time probably won't. This means it makes sense to do
1412 // this in LuceneQueryingDriver.cs, but it doesn't in
1413 // LuceneIndexingDriver.cs.
1415 private class ReaderAndVersion {
1417 public IndexReader Reader;
1418 public long Version;
1419 public int Refcount;
1421 public ReaderAndVersion (IndexReader reader, long version)
1423 this.Reader = reader;
1424 this.Version = version;
1425 this.Refcount = 1;
1429 static private Hashtable directory_rav_map = new Hashtable ();
1430 static private Hashtable reader_rav_map = new Hashtable ();
1432 static public LNS.IndexSearcher GetSearcher (Lucene.Net.Store.Directory directory)
1434 IndexReader reader = GetReader (directory);
1436 return new LNS.IndexSearcher (reader);
1439 static public IndexReader GetReader (Lucene.Net.Store.Directory directory)
1441 IndexReader reader;
1442 long version;
1444 lock (reader_rav_map) {
1445 ReaderAndVersion rav = (ReaderAndVersion) directory_rav_map [directory];
1447 if (rav == null) {
1448 version = IndexReader.GetCurrentVersion (directory);
1449 reader = IndexReader.Open (directory);
1451 rav = new ReaderAndVersion (reader, version);
1452 rav.Refcount++;
1454 directory_rav_map [directory] = rav;
1455 reader_rav_map [reader] = rav;
1457 return reader;
1460 version = IndexReader.GetCurrentVersion (directory);
1462 if (version != rav.Version) {
1463 UnrefReaderAndVersion_Unlocked (rav);
1465 reader = IndexReader.Open (directory);
1467 rav = new ReaderAndVersion (reader, version);
1468 rav.Refcount++;
1470 directory_rav_map [directory] = rav;
1471 reader_rav_map [reader] = rav;
1472 } else
1473 rav.Refcount++;
1475 return rav.Reader;
1479 static private void UnrefReaderAndVersion_Unlocked (ReaderAndVersion rav)
1481 rav.Refcount--;
1483 if (rav.Refcount == 0) {
1484 rav.Reader.Close ();
1485 reader_rav_map.Remove (rav.Reader);
1489 static public void ReleaseReader (IndexReader reader)
1491 lock (reader_rav_map) {
1492 ReaderAndVersion rav = (ReaderAndVersion) reader_rav_map [reader];
1494 UnrefReaderAndVersion_Unlocked (rav);
1498 static public void ReleaseSearcher (LNS.IndexSearcher searcher)
1500 IndexReader reader = searcher.GetIndexReader ();
1502 searcher.Close ();
1503 ReleaseReader (reader);
1506 ///////////////////////////////////////////////////////////////////////////////////
1509 // Various ways to grab lots of hits at once.
1510 // These should never be used for querying, only for utility
1511 // functions.
1514 public int GetBlockOfHits (int cookie,
1515 Hit [] block_of_hits)
1517 IndexReader primary_reader;
1518 IndexReader secondary_reader;
1519 primary_reader = GetReader (PrimaryStore);
1520 secondary_reader = GetReader (SecondaryStore);
1522 int request_size;
1523 request_size = block_of_hits.Length;
1524 if (request_size > primary_reader.NumDocs ())
1525 request_size = primary_reader.NumDocs ();
1527 int max_doc;
1528 max_doc = primary_reader.MaxDoc ();
1530 if (cookie < 0) {
1531 Random random;
1532 random = new Random ();
1533 cookie = random.Next (max_doc);
1536 int original_cookie;
1537 original_cookie = cookie;
1539 Hashtable primary_docs, secondary_docs;
1540 primary_docs = UriFu.NewHashtable ();
1541 secondary_docs = UriFu.NewHashtable ();
1543 // Load the primary documents
1544 for (int i = 0; i < request_size; ++i) {
1546 if (! primary_reader.IsDeleted (cookie)) {
1547 Document doc;
1548 doc = primary_reader.Document (cookie);
1549 primary_docs [GetUriFromDocument (doc)] = doc;
1552 ++cookie;
1553 if (cookie >= max_doc) // wrap around
1554 cookie = 0;
1556 // If we somehow end up back where we started,
1557 // give up.
1558 if (cookie == original_cookie)
1559 break;
1562 // If necessary, load the secondary documents
1563 if (secondary_reader != null) {
1564 LNS.IndexSearcher searcher;
1565 searcher = new LNS.IndexSearcher (secondary_reader);
1567 LNS.Query uri_query;
1568 uri_query = UriQuery ("Uri", primary_docs.Keys);
1570 LNS.Hits hits;
1571 hits = searcher.Search (uri_query);
1572 for (int i = 0; i < hits.Length (); ++i) {
1573 Document doc;
1574 doc = hits.Doc (i);
1575 secondary_docs [GetUriFromDocument (doc)] = doc;
1578 searcher.Close ();
1581 ReleaseReader (primary_reader);
1582 ReleaseReader (secondary_reader);
1584 // Now assemble the hits
1585 int j = 0;
1586 foreach (Uri uri in primary_docs.Keys) {
1587 Document primary_doc, secondary_doc;
1588 primary_doc = primary_docs [uri] as Document;
1589 secondary_doc = secondary_docs [uri] as Document;
1591 Hit hit;
1592 hit = DocumentToHit (primary_doc);
1593 if (secondary_doc != null)
1594 AddPropertiesToHit (hit, secondary_doc, false);
1596 block_of_hits [j] = hit;
1597 ++j;
1600 // null-pad the array, if necessary
1601 for (; j < block_of_hits.Length; ++j)
1602 block_of_hits [j] = null;
1605 // Return the new cookie
1606 return cookie;
1609 // For a large index, this will be very slow and will consume
1610 // a lot of memory. Don't call it without a good reason!
1611 // We return a hashtable indexed by Uri.
1612 public Hashtable GetAllHitsByUri ()
1614 Hashtable all_hits;
1615 all_hits = UriFu.NewHashtable ();
1617 IndexReader primary_reader;
1618 IndexReader secondary_reader;
1619 primary_reader = GetReader (PrimaryStore);
1620 secondary_reader = GetReader (SecondaryStore);
1622 // Load everything from the primary index
1623 int max_doc;
1624 max_doc = primary_reader.MaxDoc ();
1625 for (int i = 0; i < max_doc; ++i) {
1627 if (primary_reader.IsDeleted (i))
1628 continue;
1630 Document doc;
1631 doc = primary_reader.Document (i);
1633 Hit hit;
1634 hit = DocumentToHit (doc);
1635 all_hits [hit.Uri] = hit;
1638 // Now add in everything from the secondary index, if it exists
1639 if (secondary_reader != null) {
1640 max_doc = secondary_reader.MaxDoc ();
1641 for (int i = 0; i < max_doc; ++i) {
1643 if (secondary_reader.IsDeleted (i))
1644 continue;
1646 Document doc;
1647 doc = secondary_reader.Document (i);
1649 Uri uri;
1650 uri = GetUriFromDocument (doc);
1652 Hit hit;
1653 hit = (Hit) all_hits [uri];
1654 if (hit != null)
1655 AddPropertiesToHit (hit, doc, false);
1659 ReleaseReader (primary_reader);
1660 ReleaseReader (secondary_reader);
1662 return all_hits;