QueryResponses.cs, DumpIndex.cs, IQueryResult.cs, QueryExecutor.cs, QueryResult.cs...
[beagle.git] / beagled / Lucene.Net / Search / MultiSearcher.cs
blob0af1f7db11b67ebe65d37f4c1b4230e4c0d6b2b1
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
18 using Document = Lucene.Net.Documents.Document;
19 using Term = Lucene.Net.Index.Term;
21 namespace Lucene.Net.Search
24 /// <summary>Implements search over a set of <code>Searchables</code>.
25 ///
26 /// <p>Applications usually need only call the inherited {@link #Search(Query)}
27 /// or {@link #Search(Query,Filter)} methods.
28 /// </summary>
29 public class MultiSearcher : Searcher
31 private class AnonymousClassHitCollector : HitCollector
33 public AnonymousClassHitCollector(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
35 InitBlock(results, start, enclosingInstance);
37 private void InitBlock(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
39 this.results = results;
40 this.start = start;
41 this.enclosingInstance = enclosingInstance;
43 private Lucene.Net.Search.HitCollector results;
44 private int start;
45 private MultiSearcher enclosingInstance;
46 public MultiSearcher Enclosing_Instance
48 get
50 return enclosingInstance;
54 public override void Collect(int doc, float score)
56 results.Collect(doc + start, score);
59 /// <summary> Document Frequency cache acting as a Dummy-Searcher.
60 /// This class is no full-fledged Searcher, but only supports
61 /// the methods necessary to initialize Weights.
62 /// </summary>
63 private class CachedDfSource:Searcher
65 private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs
66 private int maxDoc; // document count
68 public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc)
70 this.dfMap = dfMap;
71 this.maxDoc = maxDoc;
74 public override int DocFreq(Term term)
76 int df;
77 try
79 df = ((System.Int32) dfMap[term]);
81 catch (System.NullReferenceException)
83 throw new System.ArgumentException("df for term " + term.Text() + " not available");
85 return df;
88 public override int[] DocFreqs(Term[] terms)
90 int[] result = new int[terms.Length];
91 for (int i = 0; i < terms.Length; i++)
93 result[i] = DocFreq(terms[i]);
95 return result;
98 public override int MaxDoc()
100 return maxDoc;
103 public override Query Rewrite(Query query)
105 // this is a bit of a hack. We know that a query which
106 // creates a Weight based on this Dummy-Searcher is
107 // always already rewritten (see preparedWeight()).
108 // Therefore we just return the unmodified query here
109 return query;
112 public override void Close()
114 throw new System.NotSupportedException();
117 public override Document Doc(int i)
119 throw new System.NotSupportedException();
122 public override Explanation Explain(Weight weight, int doc)
124 throw new System.NotSupportedException();
127 public override void Search(Weight weight, Filter filter, HitCollector results)
129 throw new System.NotSupportedException();
132 public override TopDocs Search(Weight weight, Filter filter, int n)
134 throw new System.NotSupportedException();
137 public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
139 throw new System.NotSupportedException();
145 private Lucene.Net.Search.Searchable[] searchables;
146 private int[] starts;
147 private int maxDoc = 0;
149 /// <summary>Creates a searcher which searches <i>searchables</i>. </summary>
150 public MultiSearcher(Lucene.Net.Search.Searchable[] searchables)
152 this.searchables = searchables;
154 starts = new int[searchables.Length + 1]; // build starts array
155 for (int i = 0; i < searchables.Length; i++)
157 starts[i] = maxDoc;
158 maxDoc += searchables[i].MaxDoc(); // compute maxDocs
160 starts[searchables.Length] = maxDoc;
163 /// <summary>Return the array of {@link Searchable}s this searches. </summary>
164 public virtual Lucene.Net.Search.Searchable[] GetSearchables()
166 return searchables;
169 protected internal virtual int[] GetStarts()
171 return starts;
174 // inherit javadoc
175 public override void Close()
177 for (int i = 0; i < searchables.Length; i++)
178 searchables[i].Close();
181 public override int DocFreq(Term term)
183 int docFreq = 0;
184 for (int i = 0; i < searchables.Length; i++)
185 docFreq += searchables[i].DocFreq(term);
186 return docFreq;
189 // inherit javadoc
190 public override Document Doc(int n)
192 int i = SubSearcher(n); // find searcher index
193 return searchables[i].Doc(n - starts[i]); // dispatch to searcher
196 /// <summary>Call {@link #subSearcher} instead.</summary>
197 /// <deprecated>
198 /// </deprecated>
199 public virtual int SearcherIndex(int n)
201 return SubSearcher(n);
204 /// <summary>Returns index of the searcher for document <code>n</code> in the array
205 /// used to construct this searcher.
206 /// </summary>
207 public virtual int SubSearcher(int n)
209 // find searcher for doc n:
210 // replace w/ call to Arrays.binarySearch in Java 1.2
211 int lo = 0; // search starts array
212 int hi = searchables.Length - 1; // for first element less
213 // than n, return its index
214 while (hi >= lo)
216 int mid = (lo + hi) >> 1;
217 int midValue = starts[mid];
218 if (n < midValue)
219 hi = mid - 1;
220 else if (n > midValue)
221 lo = mid + 1;
222 else
224 // found a match
225 while (mid + 1 < searchables.Length && starts[mid + 1] == midValue)
227 mid++; // scan to last match
229 return mid;
232 return hi;
235 /// <summary>Returns the document number of document <code>n</code> within its
236 /// sub-index.
237 /// </summary>
238 public virtual int SubDoc(int n)
240 return n - starts[SubSearcher(n)];
243 public override int MaxDoc()
245 return maxDoc;
248 public override TopDocs Search(Weight weight, Filter filter, int nDocs)
251 HitQueue hq = new HitQueue(nDocs);
252 int totalHits = 0;
254 for (int i = 0; i < searchables.Length; i++)
256 // search each searcher
257 TopDocs docs = searchables[i].Search(weight, filter, nDocs);
258 totalHits += docs.totalHits; // update totalHits
259 ScoreDoc[] scoreDocs = docs.scoreDocs;
260 for (int j = 0; j < scoreDocs.Length; j++)
262 // merge scoreDocs into hq
263 ScoreDoc scoreDoc = scoreDocs[j];
264 scoreDoc.doc += starts[i]; // convert doc
265 if (!hq.Insert(scoreDoc))
266 break; // no more scores > minScore
270 ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
271 for (int i = hq.Size() - 1; i >= 0; i--)
272 // put docs in array
273 scoreDocs2[i] = (ScoreDoc) hq.Pop();
275 float maxScore = (totalHits == 0) ? System.Single.NegativeInfinity : scoreDocs2[0].score;
277 return new TopDocs(totalHits, scoreDocs2, maxScore);
280 public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
282 FieldDocSortedHitQueue hq = null;
283 int totalHits = 0;
285 float maxScore = System.Single.NegativeInfinity;
287 for (int i = 0; i < searchables.Length; i++)
289 // search each searcher
290 TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort);
292 if (hq == null)
293 hq = new FieldDocSortedHitQueue(docs.fields, n);
294 totalHits += docs.totalHits; // update totalHits
295 maxScore = System.Math.Max(maxScore, docs.GetMaxScore());
296 ScoreDoc[] scoreDocs = docs.scoreDocs;
297 for (int j = 0; j < scoreDocs.Length; j++)
299 // merge scoreDocs into hq
300 ScoreDoc scoreDoc = scoreDocs[j];
301 scoreDoc.doc += starts[i]; // convert doc
302 if (!hq.Insert(scoreDoc))
303 break; // no more scores > minScore
307 ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
308 for (int i = hq.Size() - 1; i >= 0; i--)
309 // put docs in array
310 scoreDocs2[i] = (ScoreDoc) hq.Pop();
312 return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
316 // inherit javadoc
317 public override void Search(Weight weight, Filter filter, HitCollector results)
319 for (int i = 0; i < searchables.Length; i++)
322 int start = starts[i];
324 searchables[i].Search(weight, filter, new AnonymousClassHitCollector(results, start, this));
328 public override Query Rewrite(Query original)
330 Query[] queries = new Query[searchables.Length];
331 for (int i = 0; i < searchables.Length; i++)
333 queries[i] = searchables[i].Rewrite(original);
335 return queries[0].Combine(queries);
338 public override Explanation Explain(Weight weight, int doc)
340 int i = SubSearcher(doc); // find searcher index
341 return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
344 /// <summary> Create weight in multiple index scenario.
345 ///
346 /// Distributed query processing is done in the following steps:
347 /// 1. rewrite query
348 /// 2. extract necessary terms
349 /// 3. collect dfs for these terms from the Searchables
350 /// 4. create query weight using aggregate dfs.
351 /// 5. distribute that weight to Searchables
352 /// 6. merge results
353 ///
354 /// Steps 1-4 are done here, 5+6 in the search() methods
355 ///
356 /// </summary>
357 /// <returns> rewritten queries
358 /// </returns>
359 protected internal override Weight CreateWeight(Query original)
361 // step 1
362 Query rewrittenQuery = Rewrite(original);
364 // step 2
365 System.Collections.Hashtable terms = new System.Collections.Hashtable();
366 rewrittenQuery.ExtractTerms(terms);
368 // step3
369 Term[] allTermsArray = new Term[terms.Count];
370 int index = 0;
371 System.Collections.IEnumerator e = terms.Keys.GetEnumerator();
372 while (e.MoveNext())
373 allTermsArray[index++] = e.Current as Term;
374 int[] aggregatedDfs = new int[terms.Count];
375 for (int i = 0; i < searchables.Length; i++)
377 int[] dfs = searchables[i].DocFreqs(allTermsArray);
378 for (int j = 0; j < aggregatedDfs.Length; j++)
380 aggregatedDfs[j] += dfs[j];
384 System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
385 for (int i = 0; i < allTermsArray.Length; i++)
387 dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
390 // step4
391 int numDocs = MaxDoc();
392 CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs);
394 return rewrittenQuery.Weight(cacheSim);