beagled/Lucene.Net/Search/MultiSearcher.cs

   1 /*
   2  * Copyright 2004 The Apache Software Foundation
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16 using System;
  17 using Document = Lucene.Net.Documents.Document;
  18 using Term = Lucene.Net.Index.Term;
  19 namespace Lucene.Net.Search
  20 {
  21
  22         /// <summary>Implements search over a set of <code>Searchables</code>.
  23         ///
  24         /// <p>Applications usually need only call the inherited {@link #Search(Query)}
  25         /// or {@link #Search(Query,Filter)} methods.
  26         /// </summary>
  27         public class MultiSearcher : Searcher
  28         {
  29                 private class AnonymousClassHitCollector : HitCollector
  30                 {
  31                         public AnonymousClassHitCollector(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
  32                         {
  33                                 InitBlock(results, start, enclosingInstance);
  34                         }
  35                         private void  InitBlock(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
  36                         {
  37                                 this.results = results;
  38                                 this.start = start;
  39                                 this.enclosingInstance = enclosingInstance;
  40                         }
  41                         private Lucene.Net.Search.HitCollector results;
  42                         private int start;
  43                         private MultiSearcher enclosingInstance;
  44                         public MultiSearcher Enclosing_Instance
  45                         {
  46                                 get
  47                                 {
  48                                         return enclosingInstance;
  49                                 }
  50
  51                         }
  52                         public override void  Collect(int doc, float score)
  53                         {
  54                                 results.Collect(doc + start, score);
  55                         }
  56                 }
  57         /// <summary> Document Frequency cache acting as a Dummy-Searcher.
  58         /// This class is no full-fledged Searcher, but only supports
  59         /// the methods necessary to initialize Weights.
  60         /// </summary>
  61         private class CachedDfSource:Searcher
  62         {
  63             private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs
  64             private int maxDoc; // document count
  65
  66             public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc)
  67             {
  68                 this.dfMap = dfMap;
  69                 this.maxDoc = maxDoc;
  70             }
  71
  72             public override int DocFreq(Term term)
  73             {
  74                 int df;
  75                 try
  76                 {
  77                     df = ((System.Int32) dfMap[term]);
  78                 }
  79                 catch (System.NullReferenceException e)
  80                 {
  81                     throw new System.ArgumentException("df for term " + term.Text() + " not available");
  82                 }
  83                 return df;
  84             }
  85
  86             public override int[] DocFreqs(Term[] terms)
  87             {
  88                 int[] result = new int[terms.Length];
  89                 for (int i = 0; i < terms.Length; i++)
  90                 {
  91                     result[i] = DocFreq(terms[i]);
  92                 }
  93                 return result;
  94             }
  95
  96             public override int MaxDoc()
  97             {
  98                 return maxDoc;
  99             }
 100
 101             public override Query Rewrite(Query query)
 102             {
 103                 // this is a bit of a hack. We know that a query which
 104                 // creates a Weight based on this Dummy-Searcher is
 105                 // always already rewritten (see preparedWeight()).
 106                 // Therefore we just return the unmodified query here
 107                 return query;
 108             }
 109
 110             public override void  Close()
 111             {
 112                 throw new System.NotSupportedException();
 113             }
 114
 115             public override Document Doc(int i)
 116             {
 117                 throw new System.NotSupportedException();
 118             }
 119
 120             public override Explanation Explain(Query query, int doc)
 121             {
 122                 throw new System.NotSupportedException();
 123             }
 124
 125             public override Explanation Explain(Weight weight, int doc)
 126             {
 127                 throw new System.NotSupportedException();
 128             }
 129
 130             public override void  Search(Query query, Filter filter, HitCollector results)
 131             {
 132                 throw new System.NotSupportedException();
 133             }
 134
 135             public override void  Search(Weight weight, Filter filter, HitCollector results)
 136             {
 137                 throw new System.NotSupportedException();
 138             }
 139
 140             public override TopDocs Search(Query query, Filter filter, int n)
 141             {
 142                 throw new System.NotSupportedException();
 143             }
 144
 145             public override TopDocs Search(Weight weight, Filter filter, int n)
 146             {
 147                 throw new System.NotSupportedException();
 148             }
 149
 150             public override TopFieldDocs Search(Query query, Filter filter, int n, Sort sort)
 151             {
 152                 throw new System.NotSupportedException();
 153             }
 154
 155             public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
 156             {
 157                 throw new System.NotSupportedException();
 158             }
 159         }
 160
 161
 162
 163         private Lucene.Net.Search.Searchable[] searchables;
 164                 private int[] starts;
 165                 private int maxDoc = 0;
 166
 167                 /// <summary>Creates a searcher which searches <i>searchables</i>. </summary>
 168                 public MultiSearcher(Lucene.Net.Search.Searchable[] searchables)
 169                 {
 170                         this.searchables = searchables;
 171
 172                         starts = new int[searchables.Length + 1]; // build starts array
 173                         for (int i = 0; i < searchables.Length; i++)
 174                         {
 175                                 starts[i] = maxDoc;
 176                                 maxDoc += searchables[i].MaxDoc(); // compute maxDocs
 177                         }
 178                         starts[searchables.Length] = maxDoc;
 179                 }
 180
 181         /// <summary>Return the array of {@link Searchable}s this searches. </summary>
 182         public virtual Lucene.Net.Search.Searchable[] GetSearchables()
 183         {
 184             return searchables;
 185         }
 186
 187         protected internal virtual int[] GetStarts()
 188                 {
 189                         return starts;
 190                 }
 191
 192                 // inherit javadoc
 193                 public override void  Close()
 194                 {
 195                         for (int i = 0; i < searchables.Length; i++)
 196                                 searchables[i].Close();
 197                 }
 198
 199                 public override int DocFreq(Term term)
 200                 {
 201                         int docFreq = 0;
 202                         for (int i = 0; i < searchables.Length; i++)
 203                                 docFreq += searchables[i].DocFreq(term);
 204                         return docFreq;
 205                 }
 206
 207                 // inherit javadoc
 208                 public override Document Doc(int n)
 209                 {
 210                         int i = SubSearcher(n); // find searcher index
 211                         return searchables[i].Doc(n - starts[i]); // dispatch to searcher
 212                 }
 213
 214                 /// <summary>Call {@link #subSearcher} instead.</summary>
 215                 /// <deprecated>
 216                 /// </deprecated>
 217                 public virtual int SearcherIndex(int n)
 218                 {
 219                         return SubSearcher(n);
 220                 }
 221
 222                 /// <summary>Returns index of the searcher for document <code>n</code> in the array
 223                 /// used to construct this searcher.
 224                 /// </summary>
 225                 public virtual int SubSearcher(int n)
 226                 {
 227                         // find searcher for doc n:
 228                         // replace w/ call to Arrays.binarySearch in Java 1.2
 229                         int lo = 0; // search starts array
 230                         int hi = searchables.Length - 1; // for first element less
 231                         // than n, return its index
 232                         while (hi >= lo)
 233                         {
 234                                 int mid = (lo + hi) >> 1;
 235                                 int midValue = starts[mid];
 236                                 if (n < midValue)
 237                                         hi = mid - 1;
 238                                 else if (n > midValue)
 239                                         lo = mid + 1;
 240                                 else
 241                                 {
 242                                         // found a match
 243                                         while (mid + 1 < searchables.Length && starts[mid + 1] == midValue)
 244                                         {
 245                                                 mid++; // scan to last match
 246                                         }
 247                                         return mid;
 248                                 }
 249                         }
 250                         return hi;
 251                 }
 252
 253                 /// <summary>Returns the document number of document <code>n</code> within its
 254                 /// sub-index.
 255                 /// </summary>
 256                 public virtual int SubDoc(int n)
 257                 {
 258                         return n - starts[SubSearcher(n)];
 259                 }
 260
 261                 public override int MaxDoc()
 262                 {
 263                         return maxDoc;
 264                 }
 265
 266         public override TopDocs Search(Query query, Filter filter, int nDocs)
 267         {
 268             Weight weight = PrepareWeight(query);
 269             return Search(weight, filter, nDocs);
 270         }
 271
 272         public override TopDocs Search(Weight weight, Filter filter, int nDocs)
 273         {
 274
 275             HitQueue hq = new HitQueue(nDocs);
 276             int totalHits = 0;
 277
 278             for (int i = 0; i < searchables.Length; i++)
 279             {
 280                 // search each searcher
 281                 TopDocs docs = searchables[i].Search(weight, filter, nDocs);
 282                 totalHits += docs.totalHits; // update totalHits
 283                 ScoreDoc[] scoreDocs = docs.scoreDocs;
 284                 for (int j = 0; j < scoreDocs.Length; j++)
 285                 {
 286                     // merge scoreDocs into hq
 287                     ScoreDoc scoreDoc = scoreDocs[j];
 288                     scoreDoc.doc += starts[i]; // convert doc
 289                     if (!hq.Insert(scoreDoc))
 290                         break; // no more scores > minScore
 291                 }
 292             }
 293
 294             ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
 295             for (int i = hq.Size() - 1; i >= 0; i--)
 296                 // put docs in array
 297                 scoreDocs2[i] = (ScoreDoc) hq.Pop();
 298
 299             return new TopDocs(totalHits, scoreDocs2);
 300         }
 301
 302
 303         public override TopFieldDocs Search(Query query, Filter filter, int n, Sort sort)
 304         {
 305             Weight weight = PrepareWeight(query);
 306             return Search(weight, filter, n, sort);
 307         }
 308
 309         public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
 310         {
 311             FieldDocSortedHitQueue hq = null;
 312             int totalHits = 0;
 313
 314             for (int i = 0; i < searchables.Length; i++)
 315             {
 316                 // search each searcher
 317                 TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort);
 318                 if (hq == null)
 319                     hq = new FieldDocSortedHitQueue(docs.fields, n);
 320                 totalHits += docs.totalHits; // update totalHits
 321                 ScoreDoc[] scoreDocs = docs.scoreDocs;
 322                 for (int j = 0; j < scoreDocs.Length; j++)
 323                 {
 324                     // merge scoreDocs into hq
 325                     ScoreDoc scoreDoc = scoreDocs[j];
 326                     scoreDoc.doc += starts[i]; // convert doc
 327                     if (!hq.Insert(scoreDoc))
 328                         break; // no more scores > minScore
 329                 }
 330             }
 331
 332             ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
 333             for (int i = hq.Size() - 1; i >= 0; i--)
 334                 // put docs in array
 335                 scoreDocs2[i] = (ScoreDoc) hq.Pop();
 336
 337             return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields());
 338         }
 339
 340
 341         // inherit javadoc
 342         public override void  Search(Query query, Filter filter, HitCollector results)
 343         {
 344             Weight weight = PrepareWeight(query);
 345             Search(weight, filter, results);
 346         }
 347
 348         // inherit javadoc
 349         public override void  Search(Weight weight, Filter filter, HitCollector results)
 350         {
 351             for (int i = 0; i < searchables.Length; i++)
 352             {
 353
 354                 int start = starts[i];
 355
 356                 searchables[i].Search(weight, filter, new AnonymousClassHitCollector(results, start, this));
 357             }
 358         }
 359
 360         public override Query Rewrite(Query original)
 361         {
 362             Query[] queries = new Query[searchables.Length];
 363             for (int i = 0; i < searchables.Length; i++)
 364             {
 365                 queries[i] = searchables[i].Rewrite(original);
 366             }
 367             return queries[0].Combine(queries);
 368         }
 369
 370         public override Explanation Explain(Query query, int doc)
 371         {
 372             Weight weight = PrepareWeight(query);
 373             return Explain(weight, doc);
 374         }
 375
 376
 377         public override Explanation Explain(Weight weight, int doc)
 378         {
 379             int i = SubSearcher(doc); // find searcher index
 380             return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
 381         }
 382
 383         /// <summary> Distributed query processing is done in the following steps:
 384         /// 1. rewrite query
 385         /// 2. extract necessary terms
 386         /// 3. collect dfs for these terms from the Searchables
 387         /// 4. create query weight using aggregate dfs.
 388         /// 5. distribute that weight to Searchables
 389         /// 6. merge results
 390         ///
 391         /// Steps 1-4 are done here, 5+6 in the search() methods
 392         ///
 393         /// </summary>
 394         /// <returns> rewritten queries
 395         /// </returns>
 396         private Weight PrepareWeight(Query original)
 397         {
 398             // step 1
 399             Query rewrittenQuery = Rewrite(original);
 400
 401             // step 2
 402             System.Collections.Hashtable terms = new System.Collections.Hashtable();
 403             rewrittenQuery.ExtractTerms(terms);
 404
 405             // step3
 406             Term[] allTermsArray = new Term[terms.Count];
 407             int index = 0;
 408             System.Collections.IEnumerator e = terms.GetEnumerator();
 409             while (e.MoveNext())
 410                 allTermsArray[index++] = (Term) e.Current;
 411
 412             int[] aggregatedDfs = new int[terms.Count];
 413             for (int i = 0; i < searchables.Length; i++)
 414             {
 415                 int[] dfs = searchables[i].DocFreqs(allTermsArray);
 416                 for (int j = 0; j < aggregatedDfs.Length; j++)
 417                 {
 418                     aggregatedDfs[j] += dfs[j];
 419                 }
 420             }
 421
 422             System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
 423             for (int i = 0; i < allTermsArray.Length; i++)
 424             {
 425                 dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
 426             }
 427
 428             // step4
 429             int numDocs = MaxDoc();
 430             CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs);
 431
 432             return rewrittenQuery.Weight(cacheSim);
 433         }
 434     }
 435 }