Thumbnail file hits. Based on a patch from D Bera
[beagle.git] / beagled / LuceneCommon.cs
blob587769637bcd1d572b4cbc52918a145e80af3282
1 //
2 // LuceneCommon.cs
3 //
4 // Copyright (C) 2004-2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.Diagnostics;
30 using System.Globalization;
31 using System.IO;
32 using System.Text;
33 using System.Threading;
34 using System.Xml;
35 using System.Xml.Serialization;
37 using Lucene.Net.Analysis;
38 using Lucene.Net.Analysis.Standard;
39 using Lucene.Net.Documents;
40 using Lucene.Net.Index;
41 using Lucene.Net.QueryParsers;
42 using LNS = Lucene.Net.Search;
44 using Beagle.Util;
46 namespace Beagle.Daemon {
48 public class LuceneCommon {
50 public delegate bool HitFilter (Hit hit);
52 public const string UnindexedNamespace = "_unindexed:";
54 // VERSION HISTORY
55 // ---------------
57 // 1: Original
58 // 2: Changed format of timestamp strings
59 // 3: Schema changed to be more Dashboard-Match-like
60 // 4: Schema changed for files to include _Directory property
61 // 5: Changed analyzer to support stemming. Bumped version # to
62 // force everyone to re-index.
63 // 6: lots of schema changes as part of the general refactoring
64 // 7: incremented to force a re-index after our upgrade to lucene 1.4
65 // (in theory the file formats are compatible, we are seeing 'term
66 // out of order' exceptions in some cases)
67 // 8: another forced re-index, this time because of massive changes
68 // in the file system backend (it would be nice to have per-backend
69 // versioning so that we didn't have to purge all indexes just
70 // because one changed)
71 // 9: changed the way properties are stored, changed in conjunction
72 // with sane handling of multiple properties on hits.
73 // 10: changed to support typed and mutable properties
74 // 11: moved mime type and hit type into properties
75 private const int MAJOR_VERSION = 11;
76 private int minor_version = 0;
78 private string index_name;
79 private string top_dir;
81 private string fingerprint;
82 private int last_item_count = -1;
84 // This is the big index, containing document full-texts and
85 // data that is expensive to index.
86 private Lucene.Net.Store.Directory primary_store = null;
88 // This is the small index, containing document info that we
89 // expect to have change. Canonical example: file names.
90 private Lucene.Net.Store.Directory secondary_store = null;
92 //////////////////////////////////////////////////////////////////////////////
94 protected LuceneCommon (string index_name, int minor_version)
96 this.index_name = index_name;
97 this.minor_version = minor_version;
99 this.top_dir = (Path.IsPathRooted (index_name)) ? index_name : Path.Combine (PathFinder.IndexDir, index_name);
102 //////////////////////////////////////////////////////////////////////////////
104 protected string IndexName { get { return index_name; } }
106 public Lucene.Net.Store.Directory PrimaryStore { get { return primary_store; } }
108 public Lucene.Net.Store.Directory SecondaryStore { get { return secondary_store; } }
110 public string Fingerprint { get { return fingerprint; } }
112 public string TopDirectory { get { return top_dir; } }
114 //////////////////////////////////////////////////////////////////////////////
116 protected TextCache text_cache = TextCache.UserCache;
118 public TextCache TextCache {
119 get { return text_cache; }
120 set { text_cache = value; }
123 //////////////////////////////////////////////////////////////////////////////
125 private string VersionFile {
126 get { return Path.Combine (top_dir, "version"); }
129 private string FingerprintFile {
130 get { return Path.Combine (top_dir, "fingerprint"); }
133 // Shouldn't really be public
134 public string PrimaryIndexDirectory {
135 get { return Path.Combine (top_dir, "PrimaryIndex"); }
138 // Shouldn't really be public
139 public string SecondaryIndexDirectory {
140 get { return Path.Combine (top_dir, "SecondaryIndex"); }
143 public string LockDirectory {
144 get { return Path.Combine (top_dir, "Locks"); }
147 protected bool Exists ()
149 if (! (Directory.Exists (top_dir)
150 && File.Exists (VersionFile)
151 && File.Exists (FingerprintFile)
152 && Directory.Exists (PrimaryIndexDirectory)
153 && IndexReader.IndexExists (PrimaryIndexDirectory)
154 && Directory.Exists (SecondaryIndexDirectory)
155 && IndexReader.IndexExists (SecondaryIndexDirectory)
156 && Directory.Exists (LockDirectory)))
157 return false;
159 // Check the index's version number. If it is wrong,
160 // declare the index non-existent.
162 StreamReader version_reader;
163 string version_str;
164 version_reader = new StreamReader (VersionFile);
165 version_str = version_reader.ReadLine ();
166 version_reader.Close ();
168 int current_major_version, current_minor_version;
169 int i = version_str.IndexOf ('.');
171 if (i != -1) {
172 current_major_version = Convert.ToInt32 (version_str.Substring (0, i));
173 current_minor_version = Convert.ToInt32 (version_str.Substring (i+1));
174 } else {
175 current_minor_version = Convert.ToInt32 (version_str);
176 current_major_version = 0;
179 if (current_major_version != MAJOR_VERSION
180 || (minor_version >= 0 && current_minor_version != minor_version)) {
181 Logger.Log.Debug ("Version mismatch in {0}", index_name);
182 Logger.Log.Debug ("Index has version {0}.{1}, expected {2}.{3}",
183 current_major_version, current_minor_version,
184 MAJOR_VERSION, minor_version);
185 return false;
188 // Check the lock directory: If there is a dangling write lock,
189 // assume that the index is corrupted and declare it non-existent.
190 DirectoryInfo lock_dir_info;
191 lock_dir_info = new DirectoryInfo (LockDirectory);
192 foreach (FileInfo info in lock_dir_info.GetFiles ()) {
193 if (info.Name.IndexOf ("write.lock") != -1)
194 return false;
197 return true;
200 private Lucene.Net.Store.Directory CreateIndex (string path)
202 // Create a directory to put the index in.
203 Directory.CreateDirectory (path);
205 // Create a new store.
206 Lucene.Net.Store.Directory store;
207 store = Lucene.Net.Store.FSDirectory.GetDirectory (path, LockDirectory, true);
209 // Create an empty index in that store.
210 IndexWriter writer;
211 writer = new IndexWriter (store, null, true);
212 writer.Close ();
214 return store;
217 // Create will kill your index dead. Use it with care.
218 // You don't need to call Open after calling Create.
219 protected void Create ()
221 if (minor_version < 0)
222 minor_version = 0;
224 // Purge any existing directories.
225 if (Directory.Exists (top_dir)) {
226 Logger.Log.Debug ("Purging {0}", top_dir);
227 Directory.Delete (top_dir, true);
230 // Create any necessary directories.
231 Directory.CreateDirectory (top_dir);
232 Directory.CreateDirectory (LockDirectory);
234 // Create the indexes.
235 primary_store = CreateIndex (PrimaryIndexDirectory);
236 secondary_store = CreateIndex (SecondaryIndexDirectory);
238 // Generate and store the index fingerprint.
239 fingerprint = GuidFu.ToShortString (Guid.NewGuid ());
240 TextWriter writer;
241 writer = new StreamWriter (FingerprintFile, false);
242 writer.WriteLine (fingerprint);
243 writer.Close ();
245 // Store our index version information.
246 writer = new StreamWriter (VersionFile, false);
247 writer.WriteLine ("{0}.{1}", MAJOR_VERSION, minor_version);
248 writer.Close ();
251 protected void Open ()
253 Open (false);
256 protected void Open (bool read_only_mode)
258 // Read our index fingerprint.
259 TextReader reader;
260 reader = new StreamReader (FingerprintFile);
261 fingerprint = reader.ReadLine ();
262 reader.Close ();
264 // Create stores for our indexes.
265 primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, LockDirectory, false, read_only_mode);
266 secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, LockDirectory, false, read_only_mode);
269 ////////////////////////////////////////////////////////////////
272 // Custom Analyzers
275 private class SingletonTokenStream : TokenStream {
277 private string singleton_str;
279 public SingletonTokenStream (string singleton_str)
281 this.singleton_str = singleton_str;
284 override public Lucene.Net.Analysis.Token Next ()
286 if (singleton_str == null)
287 return null;
289 Lucene.Net.Analysis.Token token;
290 token = new Lucene.Net.Analysis.Token (singleton_str, 0, singleton_str.Length);
292 singleton_str = null;
294 return token;
298 // FIXME: This assumes everything being indexed is in English!
299 private class BeagleAnalyzer : StandardAnalyzer {
301 private char [] buffer = new char [2];
302 private bool strip_extra_property_info = false;
304 public BeagleAnalyzer (bool strip_extra_property_info)
306 this.strip_extra_property_info = strip_extra_property_info;
309 public override TokenStream TokenStream (string fieldName, TextReader reader)
311 bool is_text_prop = false;
313 // Strip off the first two characters in a property.
314 // We store type information in those two characters, so we don't
315 // want to index them.
316 if (fieldName.StartsWith ("prop:")) {
318 if (strip_extra_property_info) {
319 // Skip everything up to and including the first :
320 int c;
321 do {
322 c = reader.Read ();
323 } while (c != -1 && c != ':');
326 is_text_prop = fieldName.StartsWith ("prop:_");
328 // If this is non-text property, just return one token
329 // containing the entire string. We do this to avoid
330 // tokenizing keywords.
331 if (! is_text_prop)
332 return new SingletonTokenStream (reader.ReadToEnd ());
335 TokenStream outstream;
336 outstream = base.TokenStream (fieldName, reader);
338 if (fieldName == "Text"
339 || fieldName == "HotText"
340 || fieldName == "PropertyText"
341 || is_text_prop) {
342 outstream = new NoiseFilter (outstream);
343 outstream = new PorterStemFilter (outstream);
346 return outstream;
350 static private Analyzer indexing_analyzer = new BeagleAnalyzer (true);
351 static private Analyzer query_analyzer = new BeagleAnalyzer (false);
353 static protected Analyzer IndexingAnalyzer { get { return indexing_analyzer; } }
354 static protected Analyzer QueryAnalyzer { get { return query_analyzer; } }
356 ////////////////////////////////////////////////////////////////
359 // Dealing with properties
362 static private char TypeToCode (PropertyType type)
364 switch (type) {
365 case PropertyType.Text: return 't';
366 case PropertyType.Keyword: return 'k';
367 case PropertyType.Date: return 'd';
369 throw new Exception ("Bad property type: " + type);
372 static private PropertyType CodeToType (char c)
374 switch (c) {
375 case 't': return PropertyType.Text;
376 case 'k': return PropertyType.Keyword;
377 case 'd': return PropertyType.Date;
380 throw new Exception ("Bad property code: " + c);
383 static private string TypeToWildcardField (PropertyType type)
385 switch (type) {
386 case PropertyType.Text: return "PropertyText";
387 case PropertyType.Keyword: return null; // wildcard keyword lookups are crack
388 case PropertyType.Date: return "PropertyDate";
391 return null;
394 // Exposing this is a little bit suspicious.
395 static protected string PropertyToFieldName (PropertyType type, string key)
397 return String.Format ("prop:{0}:{1}", TypeToCode (type), key);
401 static protected void AddPropertyToDocument (Property prop, Document doc)
403 if (prop == null || prop.Value == null)
404 return;
406 // Don't actually put properties in the UnindexedNamespace
407 // in the document. A horrible (and yet lovely!) hack.
408 if (prop.Key.StartsWith (UnindexedNamespace))
409 return;
411 Field f;
413 if (prop.IsSearched) {
414 string wildcard_field = TypeToWildcardField (prop.Type);
415 bool tokenize = (prop.Type == PropertyType.Text);
416 if (wildcard_field != null) {
417 f = new Field (wildcard_field,
418 prop.Value,
419 false, // never stored
420 true, // always indexed
421 tokenize);
422 doc.Add (f);
426 string coded_value;
427 coded_value = String.Format ("{0}:{1}",
428 prop.IsSearched ? 's' : '_',
429 prop.Value);
431 f = new Field (PropertyToFieldName (prop.Type, prop.Key),
432 coded_value,
433 true, // always store
434 true, // always index
435 true); // always tokenize (just strips off type code for keywords)
436 doc.Add (f);
439 static protected Property GetPropertyFromDocument (Field f, Document doc, bool from_primary_index)
441 // Note: we don't use the document that we pass in,
442 // but in theory we could. At some later point we
443 // might need to split a property's data across two or
444 // more fields in the document.
446 if (f == null)
447 return null;
449 string field_name;
450 field_name = f.Name ();
451 if (field_name.Length < 7
452 || ! field_name.StartsWith ("prop:"))
453 return null;
455 string field_value;
456 field_value = f.StringValue ();
458 Property prop;
459 prop = new Property ();
460 prop.Type = CodeToType (field_name [5]);
461 prop.Key = field_name.Substring (7);
462 prop.Value = field_value.Substring (2);
463 prop.IsSearched = (field_value [0] == 's');
464 prop.IsMutable = ! from_primary_index;
466 return prop;
469 //////////////////////////////////////////////////////////////////////////////
472 // Dealing with documents
475 static protected void BuildDocuments (Indexable indexable,
476 out Document primary_doc,
477 out Document secondary_doc)
479 primary_doc = new Document ();
480 secondary_doc = null;
482 Field f;
484 f = Field.Keyword ("Uri", UriFu.UriToSerializableString (indexable.Uri));
485 primary_doc.Add (f);
487 if (indexable.ParentUri != null) {
488 f = Field.Keyword ("ParentUri", UriFu.UriToSerializableString (indexable.ParentUri));
489 primary_doc.Add (f);
492 if (indexable.ValidTimestamp) {
493 string str = StringFu.DateTimeToString (indexable.Timestamp);
494 f = Field.Keyword ("Timestamp", str);
495 primary_doc.Add (f);
498 if (indexable.NoContent) {
499 // If there is no content, make a note of that
500 // in a special property.
501 Property prop;
502 prop = Property.NewBool ("beagle:NoContent", true);
503 AddPropertyToDocument (prop, primary_doc);
505 } else {
507 // Since we might have content, add our text
508 // readers.
510 TextReader reader;
512 reader = indexable.GetTextReader ();
513 if (reader != null) {
514 f = Field.Text ("Text", reader);
515 primary_doc.Add (f);
518 reader = indexable.GetHotTextReader ();
519 if (reader != null) {
520 f = Field.Text ("HotText", reader);
521 primary_doc.Add (f);
525 // Store the Type and MimeType in special properties
527 if (indexable.Type != null) {
528 Property prop;
529 prop = Property.NewKeyword ("beagle:Type", indexable.Type);
530 AddPropertyToDocument (prop, primary_doc);
533 if (indexable.MimeType != null) {
534 Property prop;
535 prop = Property.NewKeyword ("beagle:MimeType", indexable.MimeType);
536 AddPropertyToDocument (prop, primary_doc);
539 // Store the other properties
541 foreach (Property prop in indexable.Properties) {
543 Document target_doc = primary_doc;
544 if (prop.IsMutable) {
545 if (secondary_doc == null) {
546 secondary_doc = new Document ();
547 f = Field.Keyword ("Uri", UriFu.UriToSerializableString (indexable.Uri));
548 secondary_doc.Add (f);
550 target_doc = secondary_doc;
553 AddPropertyToDocument (prop, target_doc);
557 static protected Document RewriteDocument (Document old_secondary_doc,
558 Indexable prop_only_indexable)
560 Hashtable seen_props;
561 seen_props = new Hashtable ();
563 Document new_doc;
564 new_doc = new Document ();
566 Field uri_f;
567 uri_f = Field.Keyword ("Uri", UriFu.UriToSerializableString (prop_only_indexable.Uri));
568 new_doc.Add (uri_f);
570 Logger.Log.Debug ("Rewriting {0}", prop_only_indexable.DisplayUri);
572 // Add the new properties to the new document. To
573 // delete a property, set the Value to null... then it
574 // will be added to seen_props (so the old value will
575 // be ignored below), but AddPropertyToDocument will
576 // return w/o doing anything.
577 foreach (Property prop in prop_only_indexable.Properties) {
578 seen_props [prop.Key] = prop;
579 AddPropertyToDocument (prop, new_doc);
580 Logger.Log.Debug ("New prop '{0}' = '{1}'", prop.Key, prop.Value);
583 // Copy the other properties from the old document to the
584 // new one, skipping any properties that we got new values
585 // for out of the Indexable.
586 if (old_secondary_doc != null) {
587 foreach (Field f in old_secondary_doc.Fields ()) {
588 Property prop;
589 prop = GetPropertyFromDocument (f, old_secondary_doc, false);
590 if (prop != null && ! seen_props.Contains (prop.Key)) {
591 Logger.Log.Debug ("Old prop '{0}' = '{1}'", prop.Key, prop.Value);
592 AddPropertyToDocument (prop, new_doc);
597 return new_doc;
600 static protected Uri GetUriFromDocument (Document doc)
602 string uri;
603 uri = doc.Get ("Uri");
604 if (uri == null)
605 throw new Exception ("Got document from Lucene w/o a URI!");
606 return UriFu.UriStringToUri (uri);
609 static protected Hit DocumentToHit (Document doc)
611 Hit hit;
612 hit = new Hit ();
614 hit.Uri = GetUriFromDocument (doc);
616 string str;
617 str = doc.Get ("ParentUri");
618 if (str != null)
619 hit.ParentUri = UriFu.UriStringToUri (str);
621 hit.Timestamp = StringFu.StringToDateTime (doc.Get ("Timestamp"));
623 hit.Source = "lucene";
624 hit.ScoreRaw = 1.0;
626 AddPropertiesToHit (hit, doc, true);
628 // Get the Type and MimeType from the properties.
629 hit.Type = hit.GetFirstProperty ("beagle:Type");
630 hit.MimeType = hit.GetFirstProperty ("beagle:MimeType");
632 return hit;
635 static protected void AddPropertiesToHit (Hit hit, Document doc, bool from_primary_index)
637 foreach (Field f in doc.Fields ()) {
638 Property prop;
639 prop = GetPropertyFromDocument (f, doc, from_primary_index);
640 if (prop != null)
641 hit.AddProperty (prop);
646 //////////////////////////////////////////////////////////////////////////////
649 // Handle the index's item count
652 public int GetItemCount ()
654 if (last_item_count < 0) {
655 IndexReader reader;
656 reader = IndexReader.Open (PrimaryStore);
657 last_item_count = reader.NumDocs ();
658 reader.Close ();
660 return last_item_count;
663 // We should set the cached count of index items when IndexReaders
664 // are open and available, so calls to GetItemCount will return immediately.
666 protected bool HaveItemCount { get { return last_item_count >= 0; } }
668 protected void SetItemCount (IndexReader reader)
670 last_item_count = reader.NumDocs ();
673 protected void AdjustItemCount (int delta)
675 if (last_item_count >= 0)
676 last_item_count += delta;
679 //////////////////////////////////////////////////////////////////////////////
682 // Access to the stemmer and list of stop words
685 static PorterStemmer stemmer = new PorterStemmer ();
687 static public string Stem (string str)
689 return stemmer.Stem (str);
692 public static bool IsStopWord (string stemmed_word)
694 return ArrayFu.IndexOfString (StopAnalyzer.ENGLISH_STOP_WORDS, stemmed_word) != -1;
697 //////////////////////////////////////////////////////////////////////////////
700 // Queries
703 static private LNS.Query StringToQuery (string field_name, string text)
705 ArrayList tokens = new ArrayList ();
707 // Use the analyzer to extract the query's tokens.
708 // This code is taken from Lucene's query parser.
709 TokenStream source = QueryAnalyzer.TokenStream (field_name, new StringReader (text));
710 while (true) {
711 Lucene.Net.Analysis.Token token;
712 try {
713 token = source.Next ();
714 if (token == null)
715 break;
716 } catch (IOException) {
717 break;
719 if (token != null)
720 tokens.Add (token.TermText ());
722 try {
723 source.Close ();
724 } catch (IOException) {
725 // ignore
728 if (tokens.Count == 0)
729 return null;
731 LNS.PhraseQuery query = new LNS.PhraseQuery ();
733 foreach (string token in tokens) {
734 Term term;
735 term = new Term (field_name, token);
736 query.Add (term);
739 return query;
742 // search_subset_uris is a list of Uris that this search should be
743 // limited to.
744 static protected void QueryPartToQuery (QueryPart abstract_part,
745 bool only_build_primary_query,
746 out LNS.Query primary_query,
747 out LNS.Query secondary_query,
748 out HitFilter hit_filter)
750 primary_query = null;
751 secondary_query = null;
752 hit_filter = null;
754 if (abstract_part == null)
755 return;
757 if (abstract_part is QueryPart_Text) {
758 QueryPart_Text part = (QueryPart_Text) abstract_part;
760 if (! (part.SearchFullText || part.SearchTextProperties))
761 return;
763 LNS.BooleanQuery p_query = new LNS.BooleanQuery ();
764 primary_query = p_query;
766 if (part.SearchFullText) {
767 LNS.Query subquery;
768 subquery = StringToQuery ("Text", part.Text);
769 if (subquery != null)
770 p_query.Add (subquery, false, false);
772 // FIXME: HotText is ignored for now!
773 // subquery = StringToQuery ("HotText", part.Text);
774 // if (subquery != null)
775 // p_query.Add (subquery, false, false);
778 if (part.SearchTextProperties) {
779 LNS.Query subquery;
780 subquery = StringToQuery ("PropertyText", part.Text);
781 if (subquery != null) {
782 p_query.Add (subquery, false, false);
784 // Properties can live in either index
785 if (! only_build_primary_query)
786 secondary_query = subquery.Clone () as LNS.Query;
790 return;
793 if (abstract_part is QueryPart_Property) {
794 QueryPart_Property part = (QueryPart_Property) abstract_part;
796 string field_name;
797 if (part.Key == QueryPart_Property.AllProperties) {
798 field_name = TypeToWildcardField (part.Type);
799 // FIXME: probably shouldn't just return silently
800 if (field_name == null)
801 return;
802 } else
803 field_name = PropertyToFieldName (part.Type, part.Key);
805 if (part.Type == PropertyType.Text)
806 primary_query = StringToQuery (field_name, part.Value);
807 else
808 primary_query = new LNS.TermQuery (new Term (field_name, part.Value));
810 // Properties can live in either index
811 if (! only_build_primary_query && primary_query != null)
812 secondary_query = primary_query.Clone () as LNS.Query;
814 return;
817 if (abstract_part is QueryPart_DateRange) {
819 // FIXME: Unsupported
820 return;
823 if (abstract_part is QueryPart_Or) {
824 QueryPart_Or part = (QueryPart_Or) abstract_part;
826 // Assemble a new BooleanQuery combining all of the sub-parts.
827 LNS.BooleanQuery p_query;
828 p_query = new LNS.BooleanQuery ();
830 LNS.BooleanQuery s_query = null;
831 if (! only_build_primary_query)
832 s_query = new LNS.BooleanQuery ();
834 primary_query = p_query;
835 secondary_query = s_query;
837 foreach (QueryPart sub_part in part.SubParts) {
838 LNS.Query p_subq, s_subq;
839 HitFilter sub_hit_filter; // FIXME: This is (and must be) ignored
840 QueryPartToQuery (sub_part, only_build_primary_query,
841 out p_subq, out s_subq, out sub_hit_filter);
842 if (p_subq != null)
843 p_query.Add (p_subq, false, false);
844 if (s_subq != null)
845 s_query.Add (s_subq, false, false);
848 return;
851 throw new Exception ("Unhandled QueryPart type! " + abstract_part.ToString ());
854 static protected LNS.Query UriQuery (string field_name, Uri uri)
856 return new LNS.TermQuery (new Term (field_name, UriFu.UriToSerializableString (uri)));
859 static protected LNS.Query UriQuery (string field_name, ICollection uri_list)
861 return UriQuery (field_name, uri_list, null);
864 static protected LNS.Query UriQuery (string field_name, ICollection uri_list, LNS.Query extra_requirement)
866 if (uri_list.Count == 0)
867 return null;
869 int max_clauses;
870 max_clauses = LNS.BooleanQuery.GetMaxClauseCount ();
872 int N;
873 N = 1 + (uri_list.Count - 1) / max_clauses;
875 LNS.BooleanQuery top_query;
876 top_query = new LNS.BooleanQuery ();
878 int cursor = 0;
879 if (extra_requirement != null) {
880 top_query.Add (extra_requirement, true, false);
881 ++cursor;
884 ArrayList bottom_queries = null;
886 if (N > 1) {
887 bottom_queries = new ArrayList ();
888 for (int i = 0; i < N; ++i) {
889 LNS.BooleanQuery bq;
890 bq = new LNS.BooleanQuery ();
891 bottom_queries.Add (bq);
892 top_query.Add (bq, false, false);
896 foreach (Uri uri in uri_list) {
897 LNS.Query subquery;
898 subquery = UriQuery (field_name, uri);
900 LNS.BooleanQuery target;
901 if (N == 1)
902 target = top_query;
903 else {
904 target = (LNS.BooleanQuery) bottom_queries [cursor];
905 ++cursor;
906 if (cursor >= N)
907 cursor = 0;
910 target.Add (subquery, false, false);
913 return top_query;
916 // query is our main query.
917 // extra_query is a filtering query used for things like
918 // mime-types. It is cloned instead of used directly.
919 // uri_list is a list of Uris to restrict our
920 // query to.
921 static protected LNS.BooleanQuery LimitQuery (LNS.Query query,
922 LNS.Query extra_query,
923 ICollection uri_list)
925 if (query == null)
926 return null;
928 LNS.BooleanQuery combined;
929 combined = new LNS.BooleanQuery ();
931 combined.Add (query, true, false);
933 if (extra_query != null)
934 combined.Add (extra_query.Clone () as LNS.Query, true, false);
936 if (uri_list != null && uri_list.Count > 0) {
937 LNS.Query uri_query;
938 uri_query = UriQuery ("Uri", uri_list);
939 combined.Add (uri_query, true, false);
942 return combined;