2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 using Document
= Lucene
.Net
.Documents
.Document
;
19 using Term
= Lucene
.Net
.Index
.Term
;
21 namespace Lucene
.Net
.Search
24 /// <summary>Implements search over a set of <code>Searchables</code>.
26 /// <p>Applications usually need only call the inherited {@link #Search(Query)}
27 /// or {@link #Search(Query,Filter)} methods.
29 public class MultiSearcher
: Searcher
31 private class AnonymousClassHitCollector
: HitCollector
33 public AnonymousClassHitCollector(Lucene
.Net
.Search
.HitCollector results
, int start
, MultiSearcher enclosingInstance
)
35 InitBlock(results
, start
, enclosingInstance
);
37 private void InitBlock(Lucene
.Net
.Search
.HitCollector results
, int start
, MultiSearcher enclosingInstance
)
39 this.results
= results
;
41 this.enclosingInstance
= enclosingInstance
;
43 private Lucene
.Net
.Search
.HitCollector results
;
45 private MultiSearcher enclosingInstance
;
46 public MultiSearcher Enclosing_Instance
50 return enclosingInstance
;
54 public override void Collect(int doc
, float score
)
56 results
.Collect(doc
+ start
, score
);
59 /// <summary> Document Frequency cache acting as a Dummy-Searcher.
60 /// This class is no full-fledged Searcher, but only supports
61 /// the methods necessary to initialize Weights.
63 private class CachedDfSource
:Searcher
65 private System
.Collections
.IDictionary dfMap
; // Map from Terms to corresponding doc freqs
66 private int maxDoc
; // document count
68 public CachedDfSource(System
.Collections
.IDictionary dfMap
, int maxDoc
)
74 public override int DocFreq(Term term
)
79 df
= ((System
.Int32
) dfMap
[term
]);
81 catch (System
.NullReferenceException
)
83 throw new System
.ArgumentException("df for term " + term
.Text() + " not available");
88 public override int[] DocFreqs(Term
[] terms
)
90 int[] result
= new int[terms
.Length
];
91 for (int i
= 0; i
< terms
.Length
; i
++)
93 result
[i
] = DocFreq(terms
[i
]);
98 public override int MaxDoc()
103 public override Query
Rewrite(Query query
)
105 // this is a bit of a hack. We know that a query which
106 // creates a Weight based on this Dummy-Searcher is
107 // always already rewritten (see preparedWeight()).
108 // Therefore we just return the unmodified query here
112 public override void Close()
114 throw new System
.NotSupportedException();
117 public override Document
Doc(int i
)
119 throw new System
.NotSupportedException();
122 public override Explanation
Explain(Weight weight
, int doc
)
124 throw new System
.NotSupportedException();
127 public override void Search(Weight weight
, Filter filter
, HitCollector results
)
129 throw new System
.NotSupportedException();
132 public override TopDocs
Search(Weight weight
, Filter filter
, int n
)
134 throw new System
.NotSupportedException();
137 public override TopFieldDocs
Search(Weight weight
, Filter filter
, int n
, Sort sort
)
139 throw new System
.NotSupportedException();
145 private Lucene
.Net
.Search
.Searchable
[] searchables
;
146 private int[] starts
;
147 private int maxDoc
= 0;
149 /// <summary>Creates a searcher which searches <i>searchables</i>. </summary>
150 public MultiSearcher(Lucene
.Net
.Search
.Searchable
[] searchables
)
152 this.searchables
= searchables
;
154 starts
= new int[searchables
.Length
+ 1]; // build starts array
155 for (int i
= 0; i
< searchables
.Length
; i
++)
158 maxDoc
+= searchables
[i
].MaxDoc(); // compute maxDocs
160 starts
[searchables
.Length
] = maxDoc
;
163 /// <summary>Return the array of {@link Searchable}s this searches. </summary>
164 public virtual Lucene
.Net
.Search
.Searchable
[] GetSearchables()
169 protected internal virtual int[] GetStarts()
175 public override void Close()
177 for (int i
= 0; i
< searchables
.Length
; i
++)
178 searchables
[i
].Close();
181 public override int DocFreq(Term term
)
184 for (int i
= 0; i
< searchables
.Length
; i
++)
185 docFreq
+= searchables
[i
].DocFreq(term
);
190 public override Document
Doc(int n
)
192 int i
= SubSearcher(n
); // find searcher index
193 return searchables
[i
].Doc(n
- starts
[i
]); // dispatch to searcher
196 /// <summary>Call {@link #subSearcher} instead.</summary>
199 public virtual int SearcherIndex(int n
)
201 return SubSearcher(n
);
204 /// <summary>Returns index of the searcher for document <code>n</code> in the array
205 /// used to construct this searcher.
207 public virtual int SubSearcher(int n
)
209 // find searcher for doc n:
210 // replace w/ call to Arrays.binarySearch in Java 1.2
211 int lo
= 0; // search starts array
212 int hi
= searchables
.Length
- 1; // for first element less
213 // than n, return its index
216 int mid
= (lo
+ hi
) >> 1;
217 int midValue
= starts
[mid
];
220 else if (n
> midValue
)
225 while (mid
+ 1 < searchables
.Length
&& starts
[mid
+ 1] == midValue
)
227 mid
++; // scan to last match
235 /// <summary>Returns the document number of document <code>n</code> within its
238 public virtual int SubDoc(int n
)
240 return n
- starts
[SubSearcher(n
)];
243 public override int MaxDoc()
248 public override TopDocs
Search(Weight weight
, Filter filter
, int nDocs
)
251 HitQueue hq
= new HitQueue(nDocs
);
254 for (int i
= 0; i
< searchables
.Length
; i
++)
256 // search each searcher
257 TopDocs docs
= searchables
[i
].Search(weight
, filter
, nDocs
);
258 totalHits
+= docs
.totalHits
; // update totalHits
259 ScoreDoc
[] scoreDocs
= docs
.scoreDocs
;
260 for (int j
= 0; j
< scoreDocs
.Length
; j
++)
262 // merge scoreDocs into hq
263 ScoreDoc scoreDoc
= scoreDocs
[j
];
264 scoreDoc
.doc
+= starts
[i
]; // convert doc
265 if (!hq
.Insert(scoreDoc
))
266 break; // no more scores > minScore
270 ScoreDoc
[] scoreDocs2
= new ScoreDoc
[hq
.Size()];
271 for (int i
= hq
.Size() - 1; i
>= 0; i
--)
273 scoreDocs2
[i
] = (ScoreDoc
) hq
.Pop();
275 float maxScore
= (totalHits
== 0) ? System
.Single
.NegativeInfinity
: scoreDocs2
[0].score
;
277 return new TopDocs(totalHits
, scoreDocs2
, maxScore
);
280 public override TopFieldDocs
Search(Weight weight
, Filter filter
, int n
, Sort sort
)
282 FieldDocSortedHitQueue hq
= null;
285 float maxScore
= System
.Single
.NegativeInfinity
;
287 for (int i
= 0; i
< searchables
.Length
; i
++)
289 // search each searcher
290 TopFieldDocs docs
= searchables
[i
].Search(weight
, filter
, n
, sort
);
293 hq
= new FieldDocSortedHitQueue(docs
.fields
, n
);
294 totalHits
+= docs
.totalHits
; // update totalHits
295 maxScore
= System
.Math
.Max(maxScore
, docs
.GetMaxScore());
296 ScoreDoc
[] scoreDocs
= docs
.scoreDocs
;
297 for (int j
= 0; j
< scoreDocs
.Length
; j
++)
299 // merge scoreDocs into hq
300 ScoreDoc scoreDoc
= scoreDocs
[j
];
301 scoreDoc
.doc
+= starts
[i
]; // convert doc
302 if (!hq
.Insert(scoreDoc
))
303 break; // no more scores > minScore
307 ScoreDoc
[] scoreDocs2
= new ScoreDoc
[hq
.Size()];
308 for (int i
= hq
.Size() - 1; i
>= 0; i
--)
310 scoreDocs2
[i
] = (ScoreDoc
) hq
.Pop();
312 return new TopFieldDocs(totalHits
, scoreDocs2
, hq
.GetFields(), maxScore
);
317 public override void Search(Weight weight
, Filter filter
, HitCollector results
)
319 for (int i
= 0; i
< searchables
.Length
; i
++)
322 int start
= starts
[i
];
324 searchables
[i
].Search(weight
, filter
, new AnonymousClassHitCollector(results
, start
, this));
328 public override Query
Rewrite(Query original
)
330 Query
[] queries
= new Query
[searchables
.Length
];
331 for (int i
= 0; i
< searchables
.Length
; i
++)
333 queries
[i
] = searchables
[i
].Rewrite(original
);
335 return queries
[0].Combine(queries
);
338 public override Explanation
Explain(Weight weight
, int doc
)
340 int i
= SubSearcher(doc
); // find searcher index
341 return searchables
[i
].Explain(weight
, doc
- starts
[i
]); // dispatch to searcher
344 /// <summary> Create weight in multiple index scenario.
346 /// Distributed query processing is done in the following steps:
348 /// 2. extract necessary terms
349 /// 3. collect dfs for these terms from the Searchables
350 /// 4. create query weight using aggregate dfs.
351 /// 5. distribute that weight to Searchables
354 /// Steps 1-4 are done here, 5+6 in the search() methods
357 /// <returns> rewritten queries
359 protected internal override Weight
CreateWeight(Query original
)
362 Query rewrittenQuery
= Rewrite(original
);
365 System
.Collections
.Hashtable terms
= new System
.Collections
.Hashtable();
366 rewrittenQuery
.ExtractTerms(terms
);
369 Term
[] allTermsArray
= new Term
[terms
.Count
];
371 System
.Collections
.IEnumerator e
= terms
.Keys
.GetEnumerator();
373 allTermsArray
[index
++] = e
.Current
as Term
;
374 int[] aggregatedDfs
= new int[terms
.Count
];
375 for (int i
= 0; i
< searchables
.Length
; i
++)
377 int[] dfs
= searchables
[i
].DocFreqs(allTermsArray
);
378 for (int j
= 0; j
< aggregatedDfs
.Length
; j
++)
380 aggregatedDfs
[j
] += dfs
[j
];
384 System
.Collections
.Hashtable dfMap
= new System
.Collections
.Hashtable();
385 for (int i
= 0; i
< allTermsArray
.Length
; i
++)
387 dfMap
[allTermsArray
[i
]] = (System
.Int32
) aggregatedDfs
[i
];
391 int numDocs
= MaxDoc();
392 CachedDfSource cacheSim
= new CachedDfSource(dfMap
, numDocs
);
394 return rewrittenQuery
.Weight(cacheSim
);