beagled/Lucene.Net/Search/MultiSearcher.cs

   1 /*
   2  * Copyright 2004 The Apache Software Foundation
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 using System;
  18 using Document = Lucene.Net.Documents.Document;
  19 using Term = Lucene.Net.Index.Term;
  20
  21 namespace Lucene.Net.Search
  22 {
  23
  24         /// <summary>Implements search over a set of <code>Searchables</code>.
  25         ///
  26         /// <p>Applications usually need only call the inherited {@link #Search(Query)}
  27         /// or {@link #Search(Query,Filter)} methods.
  28         /// </summary>
  29         public class MultiSearcher : Searcher
  30         {
  31                 private class AnonymousClassHitCollector : HitCollector
  32                 {
  33                         public AnonymousClassHitCollector(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
  34                         {
  35                                 InitBlock(results, start, enclosingInstance);
  36                         }
  37                         private void  InitBlock(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
  38                         {
  39                                 this.results = results;
  40                                 this.start = start;
  41                                 this.enclosingInstance = enclosingInstance;
  42                         }
  43                         private Lucene.Net.Search.HitCollector results;
  44                         private int start;
  45                         private MultiSearcher enclosingInstance;
  46                         public MultiSearcher Enclosing_Instance
  47                         {
  48                                 get
  49                                 {
  50                                         return enclosingInstance;
  51                                 }
  52
  53                         }
  54                         public override void  Collect(int doc, float score)
  55                         {
  56                                 results.Collect(doc + start, score);
  57                         }
  58                 }
  59                 /// <summary> Document Frequency cache acting as a Dummy-Searcher.
  60                 /// This class is no full-fledged Searcher, but only supports
  61                 /// the methods necessary to initialize Weights.
  62                 /// </summary>
  63                 private class CachedDfSource:Searcher
  64                 {
  65                         private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs
  66                         private int maxDoc; // document count
  67
  68                         public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc)
  69                         {
  70                                 this.dfMap = dfMap;
  71                                 this.maxDoc = maxDoc;
  72                         }
  73
  74                         public override int DocFreq(Term term)
  75                         {
  76                                 int df;
  77                                 try
  78                                 {
  79                                         df = ((System.Int32) dfMap[term]);
  80                                 }
  81                                 catch (System.NullReferenceException)
  82                                 {
  83                                         throw new System.ArgumentException("df for term " + term.Text() + " not available");
  84                                 }
  85                                 return df;
  86                         }
  87
  88                         public override int[] DocFreqs(Term[] terms)
  89                         {
  90                                 int[] result = new int[terms.Length];
  91                                 for (int i = 0; i < terms.Length; i++)
  92                                 {
  93                                         result[i] = DocFreq(terms[i]);
  94                                 }
  95                                 return result;
  96                         }
  97
  98                         public override int MaxDoc()
  99                         {
 100                                 return maxDoc;
 101                         }
 102
 103                         public override Query Rewrite(Query query)
 104                         {
 105                                 // this is a bit of a hack. We know that a query which
 106                                 // creates a Weight based on this Dummy-Searcher is
 107                                 // always already rewritten (see preparedWeight()).
 108                                 // Therefore we just return the unmodified query here
 109                                 return query;
 110                         }
 111
 112                         public override void  Close()
 113                         {
 114                                 throw new System.NotSupportedException();
 115                         }
 116
 117                         public override Document Doc(int i)
 118                         {
 119                                 throw new System.NotSupportedException();
 120                         }
 121
 122                         public override Explanation Explain(Weight weight, int doc)
 123                         {
 124                                 throw new System.NotSupportedException();
 125                         }
 126
 127                         public override void  Search(Weight weight, Filter filter, HitCollector results)
 128                         {
 129                                 throw new System.NotSupportedException();
 130                         }
 131
 132                         public override TopDocs Search(Weight weight, Filter filter, int n)
 133                         {
 134                                 throw new System.NotSupportedException();
 135                         }
 136
 137                         public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
 138                         {
 139                                 throw new System.NotSupportedException();
 140                         }
 141                 }
 142
 143
 144
 145                 private Lucene.Net.Search.Searchable[] searchables;
 146                 private int[] starts;
 147                 private int maxDoc = 0;
 148
 149                 /// <summary>Creates a searcher which searches <i>searchables</i>. </summary>
 150                 public MultiSearcher(Lucene.Net.Search.Searchable[] searchables)
 151                 {
 152                         this.searchables = searchables;
 153
 154                         starts = new int[searchables.Length + 1]; // build starts array
 155                         for (int i = 0; i < searchables.Length; i++)
 156                         {
 157                                 starts[i] = maxDoc;
 158                                 maxDoc += searchables[i].MaxDoc(); // compute maxDocs
 159                         }
 160                         starts[searchables.Length] = maxDoc;
 161                 }
 162
 163                 /// <summary>Return the array of {@link Searchable}s this searches. </summary>
 164                 public virtual Lucene.Net.Search.Searchable[] GetSearchables()
 165                 {
 166                         return searchables;
 167                 }
 168
 169                 protected internal virtual int[] GetStarts()
 170                 {
 171                         return starts;
 172                 }
 173
 174                 // inherit javadoc
 175                 public override void  Close()
 176                 {
 177                         for (int i = 0; i < searchables.Length; i++)
 178                                 searchables[i].Close();
 179                 }
 180
 181                 public override int DocFreq(Term term)
 182                 {
 183                         int docFreq = 0;
 184                         for (int i = 0; i < searchables.Length; i++)
 185                                 docFreq += searchables[i].DocFreq(term);
 186                         return docFreq;
 187                 }
 188
 189                 // inherit javadoc
 190                 public override Document Doc(int n)
 191                 {
 192                         int i = SubSearcher(n); // find searcher index
 193                         return searchables[i].Doc(n - starts[i]); // dispatch to searcher
 194                 }
 195
 196                 /// <summary>Call {@link #subSearcher} instead.</summary>
 197                 /// <deprecated>
 198                 /// </deprecated>
 199                 public virtual int SearcherIndex(int n)
 200                 {
 201                         return SubSearcher(n);
 202                 }
 203
 204                 /// <summary>Returns index of the searcher for document <code>n</code> in the array
 205                 /// used to construct this searcher.
 206                 /// </summary>
 207                 public virtual int SubSearcher(int n)
 208                 {
 209                         // find searcher for doc n:
 210                         // replace w/ call to Arrays.binarySearch in Java 1.2
 211                         int lo = 0; // search starts array
 212                         int hi = searchables.Length - 1; // for first element less
 213                         // than n, return its index
 214                         while (hi >= lo)
 215                         {
 216                                 int mid = (lo + hi) >> 1;
 217                                 int midValue = starts[mid];
 218                                 if (n < midValue)
 219                                         hi = mid - 1;
 220                                 else if (n > midValue)
 221                                         lo = mid + 1;
 222                                 else
 223                                 {
 224                                         // found a match
 225                                         while (mid + 1 < searchables.Length && starts[mid + 1] == midValue)
 226                                         {
 227                                                 mid++; // scan to last match
 228                                         }
 229                                         return mid;
 230                                 }
 231                         }
 232                         return hi;
 233                 }
 234
 235                 /// <summary>Returns the document number of document <code>n</code> within its
 236                 /// sub-index.
 237                 /// </summary>
 238                 public virtual int SubDoc(int n)
 239                 {
 240                         return n - starts[SubSearcher(n)];
 241                 }
 242
 243                 public override int MaxDoc()
 244                 {
 245                         return maxDoc;
 246                 }
 247
 248                 public override TopDocs Search(Weight weight, Filter filter, int nDocs)
 249                 {
 250
 251                         HitQueue hq = new HitQueue(nDocs);
 252                         int totalHits = 0;
 253
 254                         for (int i = 0; i < searchables.Length; i++)
 255                         {
 256                                 // search each searcher
 257                                 TopDocs docs = searchables[i].Search(weight, filter, nDocs);
 258                                 totalHits += docs.totalHits; // update totalHits
 259                                 ScoreDoc[] scoreDocs = docs.scoreDocs;
 260                                 for (int j = 0; j < scoreDocs.Length; j++)
 261                                 {
 262                                         // merge scoreDocs into hq
 263                                         ScoreDoc scoreDoc = scoreDocs[j];
 264                                         scoreDoc.doc += starts[i]; // convert doc
 265                                         if (!hq.Insert(scoreDoc))
 266                                                 break; // no more scores > minScore
 267                                 }
 268                         }
 269
 270                         ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
 271                         for (int i = hq.Size() - 1; i >= 0; i--)
 272                             // put docs in array
 273                                 scoreDocs2[i] = (ScoreDoc) hq.Pop();
 274
 275                         float maxScore = (totalHits == 0) ? System.Single.NegativeInfinity : scoreDocs2[0].score;
 276
 277                         return new TopDocs(totalHits, scoreDocs2, maxScore);
 278                 }
 279
 280                 public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
 281                 {
 282                         FieldDocSortedHitQueue hq = null;
 283                         int totalHits = 0;
 284
 285                         float maxScore = System.Single.NegativeInfinity;
 286
 287                         for (int i = 0; i < searchables.Length; i++)
 288                         {
 289                                 // search each searcher
 290                                 TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort);
 291
 292                                 if (hq == null)
 293                                         hq = new FieldDocSortedHitQueue(docs.fields, n);
 294                                 totalHits += docs.totalHits; // update totalHits
 295                                 maxScore = System.Math.Max(maxScore, docs.GetMaxScore());
 296                                 ScoreDoc[] scoreDocs = docs.scoreDocs;
 297                                 for (int j = 0; j < scoreDocs.Length; j++)
 298                                 {
 299                                         // merge scoreDocs into hq
 300                                         ScoreDoc scoreDoc = scoreDocs[j];
 301                                         scoreDoc.doc += starts[i]; // convert doc
 302                                         if (!hq.Insert(scoreDoc))
 303                                                 break; // no more scores > minScore
 304                                 }
 305                         }
 306
 307                         ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
 308                         for (int i = hq.Size() - 1; i >= 0; i--)
 309                             // put docs in array
 310                                 scoreDocs2[i] = (ScoreDoc) hq.Pop();
 311
 312                         return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
 313                 }
 314
 315
 316                 // inherit javadoc
 317                 public override void  Search(Weight weight, Filter filter, HitCollector results)
 318                 {
 319                         for (int i = 0; i < searchables.Length; i++)
 320                         {
 321
 322                                 int start = starts[i];
 323
 324                                 searchables[i].Search(weight, filter, new AnonymousClassHitCollector(results, start, this));
 325                         }
 326                 }
 327
 328                 public override Query Rewrite(Query original)
 329                 {
 330                         Query[] queries = new Query[searchables.Length];
 331                         for (int i = 0; i < searchables.Length; i++)
 332                         {
 333                                 queries[i] = searchables[i].Rewrite(original);
 334                         }
 335                         return queries[0].Combine(queries);
 336                 }
 337
 338                 public override Explanation Explain(Weight weight, int doc)
 339                 {
 340                         int i = SubSearcher(doc); // find searcher index
 341                         return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
 342                 }
 343
 344                 /// <summary> Create weight in multiple index scenario.
 345                 ///
 346                 /// Distributed query processing is done in the following steps:
 347                 /// 1. rewrite query
 348                 /// 2. extract necessary terms
 349                 /// 3. collect dfs for these terms from the Searchables
 350                 /// 4. create query weight using aggregate dfs.
 351                 /// 5. distribute that weight to Searchables
 352                 /// 6. merge results
 353                 ///
 354                 /// Steps 1-4 are done here, 5+6 in the search() methods
 355                 ///
 356                 /// </summary>
 357                 /// <returns> rewritten queries
 358                 /// </returns>
 359                 protected internal override Weight CreateWeight(Query original)
 360                 {
 361                         // step 1
 362                         Query rewrittenQuery = Rewrite(original);
 363
 364                         // step 2
 365                         System.Collections.Hashtable terms = new System.Collections.Hashtable();
 366                         rewrittenQuery.ExtractTerms(terms);
 367
 368                         // step3
 369                         Term[] allTermsArray = new Term[terms.Count];
 370             int index = 0;
 371             System.Collections.IEnumerator e = terms.Keys.GetEnumerator();
 372             while (e.MoveNext())
 373                 allTermsArray[index++] = e.Current as Term;
 374                         int[] aggregatedDfs = new int[terms.Count];
 375                         for (int i = 0; i < searchables.Length; i++)
 376                         {
 377                                 int[] dfs = searchables[i].DocFreqs(allTermsArray);
 378                                 for (int j = 0; j < aggregatedDfs.Length; j++)
 379                                 {
 380                                         aggregatedDfs[j] += dfs[j];
 381                                 }
 382                         }
 383
 384                         System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
 385                         for (int i = 0; i < allTermsArray.Length; i++)
 386                         {
 387                                 dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
 388                         }
 389
 390                         // step4
 391                         int numDocs = MaxDoc();
 392                         CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs);
 393
 394                         return rewrittenQuery.Weight(cacheSim);
 395                 }
 396         }
 397 }