cvsimport
[beagle.git] / beagled / Lucene.Net / Index / MultiReader.cs
blob2edfd85c166f1857d911559c4b28497c6cdfb4ea
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
18 using Document = Lucene.Net.Documents.Document;
19 using Field = Lucene.Net.Documents.Field;
20 using Directory = Lucene.Net.Store.Directory;
22 namespace Lucene.Net.Index
25 /// <summary>An IndexReader which reads multiple indexes, appending their content.
26 ///
27 /// </summary>
28 /// <version> $Id: MultiReader.cs,v 1.4 2006/10/16 19:36:57 joeshaw Exp $
29 /// </version>
30 public class MultiReader : IndexReader
32 private IndexReader[] subReaders;
33 private int[] starts; // 1st docno for each segment
34 private System.Collections.Hashtable normsCache = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
35 private int maxDoc = 0;
36 private int numDocs = - 1;
37 private bool hasDeletions = false;
39 /// <summary> <p>Construct a MultiReader aggregating the named set of (sub)readers.
40 /// Directory locking for delete, undeleteAll, and setNorm operations is
41 /// left to the subreaders. </p>
42 /// <p>Note that all subreaders are closed if this Multireader is closed.</p>
43 /// </summary>
44 /// <param name="subReaders">set of (sub)readers
45 /// </param>
46 /// <throws> IOException </throws>
47 public MultiReader(IndexReader[] subReaders):base(subReaders.Length == 0?null:subReaders[0].Directory())
49 Initialize(subReaders);
52 /// <summary>Construct reading the named set of readers. </summary>
53 public /*internal*/ MultiReader(Directory directory, SegmentInfos sis, bool closeDirectory, IndexReader[] subReaders) : base(directory, sis, closeDirectory)
55 Initialize(subReaders);
58 private void Initialize(IndexReader[] subReaders)
60 this.subReaders = subReaders;
61 starts = new int[subReaders.Length + 1]; // build starts array
62 for (int i = 0; i < subReaders.Length; i++)
64 starts[i] = maxDoc;
65 maxDoc += subReaders[i].MaxDoc(); // compute maxDocs
67 if (subReaders[i].HasDeletions())
68 hasDeletions = true;
70 starts[subReaders.Length] = maxDoc;
74 /// <summary>Return an array of term frequency vectors for the specified document.
75 /// The array contains a vector for each vectorized field in the document.
76 /// Each vector vector contains term numbers and frequencies for all terms
77 /// in a given vectorized field.
78 /// If no such fields existed, the method returns null.
79 /// </summary>
80 public override TermFreqVector[] GetTermFreqVectors(int n)
82 int i = ReaderIndex(n); // find segment num
83 return subReaders[i].GetTermFreqVectors(n - starts[i]); // dispatch to segment
86 public override TermFreqVector GetTermFreqVector(int n, System.String field)
88 int i = ReaderIndex(n); // find segment num
89 return subReaders[i].GetTermFreqVector(n - starts[i], field);
92 public override int NumDocs()
94 lock (this)
96 if (numDocs == - 1)
98 // check cache
99 int n = 0; // cache miss--recompute
100 for (int i = 0; i < subReaders.Length; i++)
101 n += subReaders[i].NumDocs(); // sum from readers
102 numDocs = n;
104 return numDocs;
108 public override int MaxDoc()
110 return maxDoc;
113 public override Document Document(int n)
115 int i = ReaderIndex(n); // find segment num
116 return subReaders[i].Document(n - starts[i]); // dispatch to segment reader
119 public override bool IsDeleted(int n)
121 int i = ReaderIndex(n); // find segment num
122 return subReaders[i].IsDeleted(n - starts[i]); // dispatch to segment reader
125 public override bool HasDeletions()
127 return hasDeletions;
130 protected internal override void DoDelete(int n)
132 numDocs = - 1; // invalidate cache
133 int i = ReaderIndex(n); // find segment num
134 subReaders[i].Delete(n - starts[i]); // dispatch to segment reader
135 hasDeletions = true;
138 protected internal override void DoUndeleteAll()
140 for (int i = 0; i < subReaders.Length; i++)
141 subReaders[i].UndeleteAll();
142 hasDeletions = false;
143 numDocs = - 1; // invalidate cache
146 private int ReaderIndex(int n)
148 // find reader for doc n:
149 int lo = 0; // search starts array
150 int hi = subReaders.Length - 1; // for first element less
152 while (hi >= lo)
154 int mid = (lo + hi) >> 1;
155 int midValue = starts[mid];
156 if (n < midValue)
157 hi = mid - 1;
158 else if (n > midValue)
159 lo = mid + 1;
160 else
162 // found a match
163 while (mid + 1 < subReaders.Length && starts[mid + 1] == midValue)
165 mid++; // scan to last match
167 return mid;
170 return hi;
173 public override bool HasNorms(System.String field)
175 for (int i = 0; i < subReaders.Length; i++)
177 if (subReaders[i].HasNorms(field))
178 return true;
180 return false;
183 private byte[] ones;
184 private byte[] FakeNorms()
186 if (ones == null)
187 ones = SegmentReader.CreateFakeNorms(MaxDoc());
188 return ones;
191 public override byte[] Norms(System.String field)
193 lock (this)
195 byte[] bytes = (byte[]) normsCache[field];
196 if (bytes != null)
197 return bytes; // cache hit
198 if (!HasNorms(field))
199 return FakeNorms();
201 bytes = new byte[MaxDoc()];
202 for (int i = 0; i < subReaders.Length; i++)
203 subReaders[i].Norms(field, bytes, starts[i]);
204 normsCache[field] = bytes; // update cache
205 return bytes;
209 public override void Norms(System.String field, byte[] result, int offset)
211 lock (this)
213 byte[] bytes = (byte[]) normsCache[field];
214 if (bytes == null && !HasNorms(field))
215 bytes = FakeNorms();
216 if (bytes != null)
217 // cache hit
218 Array.Copy(bytes, 0, result, offset, MaxDoc());
220 for (int i = 0; i < subReaders.Length; i++)
221 // read from segments
222 subReaders[i].Norms(field, result, offset + starts[i]);
226 protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed)
228 normsCache.Remove(field); // clear cache
229 int i = ReaderIndex(n); // find segment num
230 subReaders[i].SetNorm(n - starts[i], field, value_Renamed); // dispatch
233 public override TermEnum Terms()
235 return new MultiTermEnum(subReaders, starts, null);
238 public override TermEnum Terms(Term term)
240 return new MultiTermEnum(subReaders, starts, term);
243 public override int DocFreq(Term t)
245 int total = 0; // sum freqs in segments
246 for (int i = 0; i < subReaders.Length; i++)
247 total += subReaders[i].DocFreq(t);
248 return total;
251 public override TermDocs TermDocs()
253 return new MultiTermDocs(subReaders, starts);
256 public override TermPositions TermPositions()
258 return new MultiTermPositions(subReaders, starts);
261 protected internal override void DoCommit()
263 for (int i = 0; i < subReaders.Length; i++)
264 subReaders[i].Commit();
267 protected internal override void DoClose()
269 lock (this)
271 for (int i = 0; i < subReaders.Length; i++)
272 subReaders[i].Close();
276 /// <seealso cref="IndexReader.GetFieldNames()">
277 /// </seealso>
278 public override System.Collections.ICollection GetFieldNames()
280 // maintain a unique set of field names
281 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
282 for (int i = 0; i < subReaders.Length; i++)
284 IndexReader reader = subReaders[i];
285 System.Collections.ICollection names = reader.GetFieldNames();
286 for (System.Collections.IEnumerator iterator = names.GetEnumerator(); iterator.MoveNext(); )
288 System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) iterator.Current;
289 System.String s = fi.Key.ToString();
290 if (fieldSet.ContainsKey(s) == false)
292 fieldSet.Add(s, s);
296 return fieldSet;
299 /// <seealso cref="IndexReader.GetFieldNames(boolean)">
300 /// </seealso>
301 public override System.Collections.ICollection GetFieldNames(bool indexed)
303 // maintain a unique set of field names
304 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
305 for (int i = 0; i < subReaders.Length; i++)
307 IndexReader reader = subReaders[i];
308 System.Collections.ICollection names = reader.GetFieldNames(indexed);
309 for (System.Collections.IEnumerator iterator = names.GetEnumerator(); iterator.MoveNext(); )
311 System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) iterator.Current;
312 System.String s = fi.Key.ToString();
313 if (fieldSet.ContainsKey(s) == false)
315 fieldSet.Add(s, s);
319 return fieldSet;
322 public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec)
324 // maintain a unique set of field names
325 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
326 for (int i = 0; i < subReaders.Length; i++)
328 IndexReader reader = subReaders[i];
329 System.Collections.ICollection names = reader.GetIndexedFieldNames(tvSpec);
330 for (System.Collections.IEnumerator iterator = names.GetEnumerator(); iterator.MoveNext(); )
332 System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) iterator.Current;
333 System.String s = fi.Key.ToString();
334 if (fieldSet.ContainsKey(s) == false)
336 fieldSet.Add(s, s);
340 return fieldSet;
343 /// <seealso cref="IndexReader.GetFieldNames(IndexReader.FieldOption)">
344 /// </seealso>
345 public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldNames)
347 // maintain a unique set of field names
348 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
349 for (int i = 0; i < subReaders.Length; i++)
351 IndexReader reader = subReaders[i];
352 System.Collections.ICollection names = reader.GetFieldNames(fieldNames);
353 for (System.Collections.IEnumerator iterator = names.GetEnumerator(); iterator.MoveNext(); )
355 System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) iterator.Current;
356 System.String s = fi.Key.ToString();
357 if (fieldSet.ContainsKey(s) == false)
359 fieldSet.Add(s, s);
363 return fieldSet;
367 class MultiTermEnum : TermEnum
369 private SegmentMergeQueue queue;
371 private Term term;
372 private int docFreq;
374 public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
376 queue = new SegmentMergeQueue(readers.Length);
377 for (int i = 0; i < readers.Length; i++)
379 IndexReader reader = readers[i];
380 TermEnum termEnum;
382 if (t != null)
384 termEnum = reader.Terms(t);
386 else
387 termEnum = reader.Terms();
389 SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
390 if (t == null?smi.Next():termEnum.Term() != null)
391 queue.Put(smi);
392 // initialize queue
393 else
394 smi.Close();
397 if (t != null && queue.Size() > 0)
399 Next();
403 public override bool Next()
405 SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();
406 if (top == null)
408 term = null;
409 return false;
412 term = top.term;
413 docFreq = 0;
415 while (top != null && term.CompareTo(top.term) == 0)
417 queue.Pop();
418 docFreq += top.termEnum.DocFreq(); // increment freq
419 if (top.Next())
420 queue.Put(top);
421 // restore queue
422 else
423 top.Close(); // done with a segment
424 top = (SegmentMergeInfo) queue.Top();
426 return true;
429 public override Term Term()
431 return term;
434 public override int DocFreq()
436 return docFreq;
439 public override void Close()
441 queue.Close();
445 class MultiTermDocs : TermDocs
447 protected internal IndexReader[] readers;
448 protected internal int[] starts;
449 protected internal Term term;
451 protected internal int base_Renamed = 0;
452 protected internal int pointer = 0;
454 private TermDocs[] readerTermDocs;
455 protected internal TermDocs current; // == readerTermDocs[pointer]
457 public MultiTermDocs(IndexReader[] r, int[] s)
459 readers = r;
460 starts = s;
462 readerTermDocs = new TermDocs[r.Length];
465 public virtual int Doc()
467 return base_Renamed + current.Doc();
469 public virtual int Freq()
471 return current.Freq();
474 public virtual void Seek(Term term)
476 this.term = term;
477 this.base_Renamed = 0;
478 this.pointer = 0;
479 this.current = null;
482 public virtual void Seek(TermEnum termEnum)
484 Seek(termEnum.Term());
487 public virtual bool Next()
489 if (current != null && current.Next())
491 return true;
493 else if (pointer < readers.Length)
495 base_Renamed = starts[pointer];
496 current = TermDocs(pointer++);
497 return Next();
499 else
500 return false;
503 /// <summary>Optimized implementation. </summary>
504 public virtual int Read(int[] docs, int[] freqs)
506 while (true)
508 while (current == null)
510 if (pointer < readers.Length)
512 // try next segment
513 base_Renamed = starts[pointer];
514 current = TermDocs(pointer++);
516 else
518 return 0;
521 int end = current.Read(docs, freqs);
522 if (end == 0)
524 // none left in segment
525 current = null;
527 else
529 // got some
530 int b = base_Renamed; // adjust doc numbers
531 for (int i = 0; i < end; i++)
532 docs[i] += b;
533 return end;
538 public virtual bool SkipTo(int target)
540 if (current != null && current.SkipTo (target - base_Renamed)) {
541 return true;
542 } else if (pointer < readers.Length) {
543 base_Renamed = starts [pointer];
544 current = TermDocs (pointer++);
545 return SkipTo (target);
546 } else
547 return false;
550 private TermDocs TermDocs(int i)
552 if (term == null)
553 return null;
554 TermDocs result = readerTermDocs[i];
555 if (result == null)
556 result = readerTermDocs[i] = TermDocs(readers[i]);
557 result.Seek(term);
558 return result;
561 protected internal virtual TermDocs TermDocs(IndexReader reader)
563 return reader.TermDocs();
566 public virtual void Close()
568 for (int i = 0; i < readerTermDocs.Length; i++)
570 if (readerTermDocs[i] != null)
571 readerTermDocs[i].Close();
576 class MultiTermPositions : MultiTermDocs, TermPositions
578 public MultiTermPositions(IndexReader[] r, int[] s):base(r, s)
582 protected internal override TermDocs TermDocs(IndexReader reader)
584 return (TermDocs) reader.TermPositions();
587 public virtual int NextPosition()
589 return ((TermPositions) current).NextPosition();