2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using Document
= Lucene
.Net
.Documents
.Document
;
18 using Term
= Lucene
.Net
.Index
.Term
;
19 namespace Lucene
.Net
.Search
22 /// <summary>Implements search over a set of <code>Searchables</code>.
24 /// <p>Applications usually need only call the inherited {@link #Search(Query)}
25 /// or {@link #Search(Query,Filter)} methods.
27 public class MultiSearcher
: Searcher
29 private class AnonymousClassHitCollector
: HitCollector
31 public AnonymousClassHitCollector(Lucene
.Net
.Search
.HitCollector results
, int start
, MultiSearcher enclosingInstance
)
33 InitBlock(results
, start
, enclosingInstance
);
35 private void InitBlock(Lucene
.Net
.Search
.HitCollector results
, int start
, MultiSearcher enclosingInstance
)
37 this.results
= results
;
39 this.enclosingInstance
= enclosingInstance
;
41 private Lucene
.Net
.Search
.HitCollector results
;
43 private MultiSearcher enclosingInstance
;
44 public MultiSearcher Enclosing_Instance
48 return enclosingInstance
;
52 public override void Collect(int doc
, float score
)
54 results
.Collect(doc
+ start
, score
);
57 /// <summary> Document Frequency cache acting as a Dummy-Searcher.
58 /// This class is no full-fledged Searcher, but only supports
59 /// the methods necessary to initialize Weights.
61 private class CachedDfSource
:Searcher
63 private System
.Collections
.IDictionary dfMap
; // Map from Terms to corresponding doc freqs
64 private int maxDoc
; // document count
66 public CachedDfSource(System
.Collections
.IDictionary dfMap
, int maxDoc
)
72 public override int DocFreq(Term term
)
77 df
= ((System
.Int32
) dfMap
[term
]);
79 catch (System
.NullReferenceException e
)
81 throw new System
.ArgumentException("df for term " + term
.Text() + " not available");
86 public override int[] DocFreqs(Term
[] terms
)
88 int[] result
= new int[terms
.Length
];
89 for (int i
= 0; i
< terms
.Length
; i
++)
91 result
[i
] = DocFreq(terms
[i
]);
96 public override int MaxDoc()
101 public override Query
Rewrite(Query query
)
103 // this is a bit of a hack. We know that a query which
104 // creates a Weight based on this Dummy-Searcher is
105 // always already rewritten (see preparedWeight()).
106 // Therefore we just return the unmodified query here
110 public override void Close()
112 throw new System
.NotSupportedException();
115 public override Document
Doc(int i
)
117 throw new System
.NotSupportedException();
120 public override Explanation
Explain(Query query
, int doc
)
122 throw new System
.NotSupportedException();
125 public override Explanation
Explain(Weight weight
, int doc
)
127 throw new System
.NotSupportedException();
130 public override void Search(Query query
, Filter filter
, HitCollector results
)
132 throw new System
.NotSupportedException();
135 public override void Search(Weight weight
, Filter filter
, HitCollector results
)
137 throw new System
.NotSupportedException();
140 public override TopDocs
Search(Query query
, Filter filter
, int n
)
142 throw new System
.NotSupportedException();
145 public override TopDocs
Search(Weight weight
, Filter filter
, int n
)
147 throw new System
.NotSupportedException();
150 public override TopFieldDocs
Search(Query query
, Filter filter
, int n
, Sort sort
)
152 throw new System
.NotSupportedException();
155 public override TopFieldDocs
Search(Weight weight
, Filter filter
, int n
, Sort sort
)
157 throw new System
.NotSupportedException();
163 private Lucene
.Net
.Search
.Searchable
[] searchables
;
164 private int[] starts
;
165 private int maxDoc
= 0;
167 /// <summary>Creates a searcher which searches <i>searchables</i>. </summary>
168 public MultiSearcher(Lucene
.Net
.Search
.Searchable
[] searchables
)
170 this.searchables
= searchables
;
172 starts
= new int[searchables
.Length
+ 1]; // build starts array
173 for (int i
= 0; i
< searchables
.Length
; i
++)
176 maxDoc
+= searchables
[i
].MaxDoc(); // compute maxDocs
178 starts
[searchables
.Length
] = maxDoc
;
181 /// <summary>Return the array of {@link Searchable}s this searches. </summary>
182 public virtual Lucene
.Net
.Search
.Searchable
[] GetSearchables()
187 protected internal virtual int[] GetStarts()
193 public override void Close()
195 for (int i
= 0; i
< searchables
.Length
; i
++)
196 searchables
[i
].Close();
199 public override int DocFreq(Term term
)
202 for (int i
= 0; i
< searchables
.Length
; i
++)
203 docFreq
+= searchables
[i
].DocFreq(term
);
208 public override Document
Doc(int n
)
210 int i
= SubSearcher(n
); // find searcher index
211 return searchables
[i
].Doc(n
- starts
[i
]); // dispatch to searcher
214 /// <summary>Call {@link #subSearcher} instead.</summary>
217 public virtual int SearcherIndex(int n
)
219 return SubSearcher(n
);
222 /// <summary>Returns index of the searcher for document <code>n</code> in the array
223 /// used to construct this searcher.
225 public virtual int SubSearcher(int n
)
227 // find searcher for doc n:
228 // replace w/ call to Arrays.binarySearch in Java 1.2
229 int lo
= 0; // search starts array
230 int hi
= searchables
.Length
- 1; // for first element less
231 // than n, return its index
234 int mid
= (lo
+ hi
) >> 1;
235 int midValue
= starts
[mid
];
238 else if (n
> midValue
)
243 while (mid
+ 1 < searchables
.Length
&& starts
[mid
+ 1] == midValue
)
245 mid
++; // scan to last match
253 /// <summary>Returns the document number of document <code>n</code> within its
256 public virtual int SubDoc(int n
)
258 return n
- starts
[SubSearcher(n
)];
261 public override int MaxDoc()
266 public override TopDocs
Search(Query query
, Filter filter
, int nDocs
)
268 Weight weight
= PrepareWeight(query
);
269 return Search(weight
, filter
, nDocs
);
272 public override TopDocs
Search(Weight weight
, Filter filter
, int nDocs
)
275 HitQueue hq
= new HitQueue(nDocs
);
278 for (int i
= 0; i
< searchables
.Length
; i
++)
280 // search each searcher
281 TopDocs docs
= searchables
[i
].Search(weight
, filter
, nDocs
);
282 totalHits
+= docs
.totalHits
; // update totalHits
283 ScoreDoc
[] scoreDocs
= docs
.scoreDocs
;
284 for (int j
= 0; j
< scoreDocs
.Length
; j
++)
286 // merge scoreDocs into hq
287 ScoreDoc scoreDoc
= scoreDocs
[j
];
288 scoreDoc
.doc
+= starts
[i
]; // convert doc
289 if (!hq
.Insert(scoreDoc
))
290 break; // no more scores > minScore
294 ScoreDoc
[] scoreDocs2
= new ScoreDoc
[hq
.Size()];
295 for (int i
= hq
.Size() - 1; i
>= 0; i
--)
297 scoreDocs2
[i
] = (ScoreDoc
) hq
.Pop();
299 return new TopDocs(totalHits
, scoreDocs2
);
303 public override TopFieldDocs
Search(Query query
, Filter filter
, int n
, Sort sort
)
305 Weight weight
= PrepareWeight(query
);
306 return Search(weight
, filter
, n
, sort
);
309 public override TopFieldDocs
Search(Weight weight
, Filter filter
, int n
, Sort sort
)
311 FieldDocSortedHitQueue hq
= null;
314 for (int i
= 0; i
< searchables
.Length
; i
++)
316 // search each searcher
317 TopFieldDocs docs
= searchables
[i
].Search(weight
, filter
, n
, sort
);
319 hq
= new FieldDocSortedHitQueue(docs
.fields
, n
);
320 totalHits
+= docs
.totalHits
; // update totalHits
321 ScoreDoc
[] scoreDocs
= docs
.scoreDocs
;
322 for (int j
= 0; j
< scoreDocs
.Length
; j
++)
324 // merge scoreDocs into hq
325 ScoreDoc scoreDoc
= scoreDocs
[j
];
326 scoreDoc
.doc
+= starts
[i
]; // convert doc
327 if (!hq
.Insert(scoreDoc
))
328 break; // no more scores > minScore
332 ScoreDoc
[] scoreDocs2
= new ScoreDoc
[hq
.Size()];
333 for (int i
= hq
.Size() - 1; i
>= 0; i
--)
335 scoreDocs2
[i
] = (ScoreDoc
) hq
.Pop();
337 return new TopFieldDocs(totalHits
, scoreDocs2
, hq
.GetFields());
342 public override void Search(Query query
, Filter filter
, HitCollector results
)
344 Weight weight
= PrepareWeight(query
);
345 Search(weight
, filter
, results
);
349 public override void Search(Weight weight
, Filter filter
, HitCollector results
)
351 for (int i
= 0; i
< searchables
.Length
; i
++)
354 int start
= starts
[i
];
356 searchables
[i
].Search(weight
, filter
, new AnonymousClassHitCollector(results
, start
, this));
360 public override Query
Rewrite(Query original
)
362 Query
[] queries
= new Query
[searchables
.Length
];
363 for (int i
= 0; i
< searchables
.Length
; i
++)
365 queries
[i
] = searchables
[i
].Rewrite(original
);
367 return queries
[0].Combine(queries
);
370 public override Explanation
Explain(Query query
, int doc
)
372 Weight weight
= PrepareWeight(query
);
373 return Explain(weight
, doc
);
377 public override Explanation
Explain(Weight weight
, int doc
)
379 int i
= SubSearcher(doc
); // find searcher index
380 return searchables
[i
].Explain(weight
, doc
- starts
[i
]); // dispatch to searcher
383 /// <summary> Distributed query processing is done in the following steps:
385 /// 2. extract necessary terms
386 /// 3. collect dfs for these terms from the Searchables
387 /// 4. create query weight using aggregate dfs.
388 /// 5. distribute that weight to Searchables
391 /// Steps 1-4 are done here, 5+6 in the search() methods
394 /// <returns> rewritten queries
396 private Weight
PrepareWeight(Query original
)
399 Query rewrittenQuery
= Rewrite(original
);
402 System
.Collections
.Hashtable terms
= new System
.Collections
.Hashtable();
403 rewrittenQuery
.ExtractTerms(terms
);
406 Term
[] allTermsArray
= new Term
[terms
.Count
];
408 System
.Collections
.IEnumerator e
= terms
.GetEnumerator();
410 allTermsArray
[index
++] = (Term
) e
.Current
;
412 int[] aggregatedDfs
= new int[terms
.Count
];
413 for (int i
= 0; i
< searchables
.Length
; i
++)
415 int[] dfs
= searchables
[i
].DocFreqs(allTermsArray
);
416 for (int j
= 0; j
< aggregatedDfs
.Length
; j
++)
418 aggregatedDfs
[j
] += dfs
[j
];
422 System
.Collections
.Hashtable dfMap
= new System
.Collections
.Hashtable();
423 for (int i
= 0; i
< allTermsArray
.Length
; i
++)
425 dfMap
[allTermsArray
[i
]] = (System
.Int32
) aggregatedDfs
[i
];
429 int numDocs
= MaxDoc();
430 CachedDfSource cacheSim
= new CachedDfSource(dfMap
, numDocs
);
432 return rewrittenQuery
.Weight(cacheSim
);