2006-09-10 Francisco Javier F. Serrador <serrador@openshine.com>
[beagle.git] / beagled / Lucene.Net / Search / MultiSearcher.cs
blob694653833ecaa12f24ec0812b32f6eb064087163
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 using System;
17 using Document = Lucene.Net.Documents.Document;
18 using Term = Lucene.Net.Index.Term;
19 namespace Lucene.Net.Search
22 /// <summary>Implements search over a set of <code>Searchables</code>.
23 ///
24 /// <p>Applications usually need only call the inherited {@link #Search(Query)}
25 /// or {@link #Search(Query,Filter)} methods.
26 /// </summary>
27 public class MultiSearcher : Searcher
29 private class AnonymousClassHitCollector : HitCollector
31 public AnonymousClassHitCollector(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
33 InitBlock(results, start, enclosingInstance);
35 private void InitBlock(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
37 this.results = results;
38 this.start = start;
39 this.enclosingInstance = enclosingInstance;
41 private Lucene.Net.Search.HitCollector results;
42 private int start;
43 private MultiSearcher enclosingInstance;
44 public MultiSearcher Enclosing_Instance
46 get
48 return enclosingInstance;
52 public override void Collect(int doc, float score)
54 results.Collect(doc + start, score);
57 /// <summary> Document Frequency cache acting as a Dummy-Searcher.
58 /// This class is no full-fledged Searcher, but only supports
59 /// the methods necessary to initialize Weights.
60 /// </summary>
61 private class CachedDfSource:Searcher
63 private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs
64 private int maxDoc; // document count
66 public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc)
68 this.dfMap = dfMap;
69 this.maxDoc = maxDoc;
72 public override int DocFreq(Term term)
74 int df;
75 try
77 df = ((System.Int32) dfMap[term]);
79 catch (System.NullReferenceException e)
81 throw new System.ArgumentException("df for term " + term.Text() + " not available");
83 return df;
86 public override int[] DocFreqs(Term[] terms)
88 int[] result = new int[terms.Length];
89 for (int i = 0; i < terms.Length; i++)
91 result[i] = DocFreq(terms[i]);
93 return result;
96 public override int MaxDoc()
98 return maxDoc;
101 public override Query Rewrite(Query query)
103 // this is a bit of a hack. We know that a query which
104 // creates a Weight based on this Dummy-Searcher is
105 // always already rewritten (see preparedWeight()).
106 // Therefore we just return the unmodified query here
107 return query;
110 public override void Close()
112 throw new System.NotSupportedException();
115 public override Document Doc(int i)
117 throw new System.NotSupportedException();
120 public override Explanation Explain(Query query, int doc)
122 throw new System.NotSupportedException();
125 public override Explanation Explain(Weight weight, int doc)
127 throw new System.NotSupportedException();
130 public override void Search(Query query, Filter filter, HitCollector results)
132 throw new System.NotSupportedException();
135 public override void Search(Weight weight, Filter filter, HitCollector results)
137 throw new System.NotSupportedException();
140 public override TopDocs Search(Query query, Filter filter, int n)
142 throw new System.NotSupportedException();
145 public override TopDocs Search(Weight weight, Filter filter, int n)
147 throw new System.NotSupportedException();
150 public override TopFieldDocs Search(Query query, Filter filter, int n, Sort sort)
152 throw new System.NotSupportedException();
155 public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
157 throw new System.NotSupportedException();
163 private Lucene.Net.Search.Searchable[] searchables;
164 private int[] starts;
165 private int maxDoc = 0;
167 /// <summary>Creates a searcher which searches <i>searchables</i>. </summary>
168 public MultiSearcher(Lucene.Net.Search.Searchable[] searchables)
170 this.searchables = searchables;
172 starts = new int[searchables.Length + 1]; // build starts array
173 for (int i = 0; i < searchables.Length; i++)
175 starts[i] = maxDoc;
176 maxDoc += searchables[i].MaxDoc(); // compute maxDocs
178 starts[searchables.Length] = maxDoc;
181 /// <summary>Return the array of {@link Searchable}s this searches. </summary>
182 public virtual Lucene.Net.Search.Searchable[] GetSearchables()
184 return searchables;
187 protected internal virtual int[] GetStarts()
189 return starts;
192 // inherit javadoc
193 public override void Close()
195 for (int i = 0; i < searchables.Length; i++)
196 searchables[i].Close();
199 public override int DocFreq(Term term)
201 int docFreq = 0;
202 for (int i = 0; i < searchables.Length; i++)
203 docFreq += searchables[i].DocFreq(term);
204 return docFreq;
207 // inherit javadoc
208 public override Document Doc(int n)
210 int i = SubSearcher(n); // find searcher index
211 return searchables[i].Doc(n - starts[i]); // dispatch to searcher
214 /// <summary>Call {@link #subSearcher} instead.</summary>
215 /// <deprecated>
216 /// </deprecated>
217 public virtual int SearcherIndex(int n)
219 return SubSearcher(n);
222 /// <summary>Returns index of the searcher for document <code>n</code> in the array
223 /// used to construct this searcher.
224 /// </summary>
225 public virtual int SubSearcher(int n)
227 // find searcher for doc n:
228 // replace w/ call to Arrays.binarySearch in Java 1.2
229 int lo = 0; // search starts array
230 int hi = searchables.Length - 1; // for first element less
231 // than n, return its index
232 while (hi >= lo)
234 int mid = (lo + hi) >> 1;
235 int midValue = starts[mid];
236 if (n < midValue)
237 hi = mid - 1;
238 else if (n > midValue)
239 lo = mid + 1;
240 else
242 // found a match
243 while (mid + 1 < searchables.Length && starts[mid + 1] == midValue)
245 mid++; // scan to last match
247 return mid;
250 return hi;
253 /// <summary>Returns the document number of document <code>n</code> within its
254 /// sub-index.
255 /// </summary>
256 public virtual int SubDoc(int n)
258 return n - starts[SubSearcher(n)];
261 public override int MaxDoc()
263 return maxDoc;
266 public override TopDocs Search(Query query, Filter filter, int nDocs)
268 Weight weight = PrepareWeight(query);
269 return Search(weight, filter, nDocs);
272 public override TopDocs Search(Weight weight, Filter filter, int nDocs)
275 HitQueue hq = new HitQueue(nDocs);
276 int totalHits = 0;
278 for (int i = 0; i < searchables.Length; i++)
280 // search each searcher
281 TopDocs docs = searchables[i].Search(weight, filter, nDocs);
282 totalHits += docs.totalHits; // update totalHits
283 ScoreDoc[] scoreDocs = docs.scoreDocs;
284 for (int j = 0; j < scoreDocs.Length; j++)
286 // merge scoreDocs into hq
287 ScoreDoc scoreDoc = scoreDocs[j];
288 scoreDoc.doc += starts[i]; // convert doc
289 if (!hq.Insert(scoreDoc))
290 break; // no more scores > minScore
294 ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
295 for (int i = hq.Size() - 1; i >= 0; i--)
296 // put docs in array
297 scoreDocs2[i] = (ScoreDoc) hq.Pop();
299 return new TopDocs(totalHits, scoreDocs2);
303 public override TopFieldDocs Search(Query query, Filter filter, int n, Sort sort)
305 Weight weight = PrepareWeight(query);
306 return Search(weight, filter, n, sort);
309 public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
311 FieldDocSortedHitQueue hq = null;
312 int totalHits = 0;
314 for (int i = 0; i < searchables.Length; i++)
316 // search each searcher
317 TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort);
318 if (hq == null)
319 hq = new FieldDocSortedHitQueue(docs.fields, n);
320 totalHits += docs.totalHits; // update totalHits
321 ScoreDoc[] scoreDocs = docs.scoreDocs;
322 for (int j = 0; j < scoreDocs.Length; j++)
324 // merge scoreDocs into hq
325 ScoreDoc scoreDoc = scoreDocs[j];
326 scoreDoc.doc += starts[i]; // convert doc
327 if (!hq.Insert(scoreDoc))
328 break; // no more scores > minScore
332 ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
333 for (int i = hq.Size() - 1; i >= 0; i--)
334 // put docs in array
335 scoreDocs2[i] = (ScoreDoc) hq.Pop();
337 return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields());
341 // inherit javadoc
342 public override void Search(Query query, Filter filter, HitCollector results)
344 Weight weight = PrepareWeight(query);
345 Search(weight, filter, results);
348 // inherit javadoc
349 public override void Search(Weight weight, Filter filter, HitCollector results)
351 for (int i = 0; i < searchables.Length; i++)
354 int start = starts[i];
356 searchables[i].Search(weight, filter, new AnonymousClassHitCollector(results, start, this));
360 public override Query Rewrite(Query original)
362 Query[] queries = new Query[searchables.Length];
363 for (int i = 0; i < searchables.Length; i++)
365 queries[i] = searchables[i].Rewrite(original);
367 return queries[0].Combine(queries);
370 public override Explanation Explain(Query query, int doc)
372 Weight weight = PrepareWeight(query);
373 return Explain(weight, doc);
377 public override Explanation Explain(Weight weight, int doc)
379 int i = SubSearcher(doc); // find searcher index
380 return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
383 /// <summary> Distributed query processing is done in the following steps:
384 /// 1. rewrite query
385 /// 2. extract necessary terms
386 /// 3. collect dfs for these terms from the Searchables
387 /// 4. create query weight using aggregate dfs.
388 /// 5. distribute that weight to Searchables
389 /// 6. merge results
390 ///
391 /// Steps 1-4 are done here, 5+6 in the search() methods
392 ///
393 /// </summary>
394 /// <returns> rewritten queries
395 /// </returns>
396 private Weight PrepareWeight(Query original)
398 // step 1
399 Query rewrittenQuery = Rewrite(original);
401 // step 2
402 System.Collections.Hashtable terms = new System.Collections.Hashtable();
403 rewrittenQuery.ExtractTerms(terms);
405 // step3
406 Term[] allTermsArray = new Term[terms.Count];
407 int index = 0;
408 System.Collections.IEnumerator e = terms.GetEnumerator();
409 while (e.MoveNext())
410 allTermsArray[index++] = (Term) e.Current;
412 int[] aggregatedDfs = new int[terms.Count];
413 for (int i = 0; i < searchables.Length; i++)
415 int[] dfs = searchables[i].DocFreqs(allTermsArray);
416 for (int j = 0; j < aggregatedDfs.Length; j++)
418 aggregatedDfs[j] += dfs[j];
422 System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
423 for (int i = 0; i < allTermsArray.Length; i++)
425 dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
428 // step4
429 int numDocs = MaxDoc();
430 CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs);
432 return rewrittenQuery.Weight(cacheSim);