Thumbnail file hits. Based on a patch from D Bera
[beagle.git] / beagled / Lucene.Net / Index / MultiReader.cs
blob337c12520458770715a2f1f1bd40f7cff471249a
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 using System;
17 using Document = Lucene.Net.Documents.Document;
18 using Directory = Lucene.Net.Store.Directory;
19 namespace Lucene.Net.Index
22 /// <summary>An IndexReader which reads multiple indexes, appending their content.
23 ///
24 /// </summary>
25 /// <version> $Id: MultiReader.cs,v 1.1 2005/01/17 19:54:29 joeshaw Exp $
26 /// </version>
27 public class MultiReader : IndexReader
29 private IndexReader[] subReaders;
30 private int[] starts; // 1st docno for each segment
31 private System.Collections.Hashtable normsCache = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
32 private int maxDoc = 0;
33 private int numDocs = -1;
34 private bool hasDeletions = false;
36 /// <summary> <p>Construct a MultiReader aggregating the named set of (sub)readers.
37 /// Directory locking for delete, undeleteAll, and setNorm operations is
38 /// left to the subreaders. </p>
39 /// <p>Note that all subreaders are closed if this Multireader is closed.</p>
40 /// </summary>
41 /// <param name="subReaders">set of (sub)readers
42 /// </param>
43 /// <throws> IOException </throws>
44 public MultiReader(IndexReader[] subReaders):base(subReaders.Length == 0?null:subReaders[0].Directory())
46 Initialize(subReaders);
49 /// <summary>Construct reading the named set of readers. </summary>
50 public /*internal*/ MultiReader(Directory directory, SegmentInfos sis, bool closeDirectory, IndexReader[] subReaders):base(directory, sis, closeDirectory)
52 Initialize(subReaders);
55 private void Initialize(IndexReader[] subReaders)
57 this.subReaders = subReaders;
58 starts = new int[subReaders.Length + 1]; // build starts array
59 for (int i = 0; i < subReaders.Length; i++)
61 starts[i] = maxDoc;
62 maxDoc += subReaders[i].MaxDoc(); // compute maxDocs
64 if (subReaders[i].HasDeletions())
65 hasDeletions = true;
67 starts[subReaders.Length] = maxDoc;
71 /// <summary>Return an array of term frequency vectors for the specified document.
72 /// The array contains a vector for each vectorized Field in the document.
73 /// Each vector vector contains term numbers and frequencies for all terms
74 /// in a given vectorized Field.
75 /// If no such fields existed, the method returns null.
76 /// </summary>
77 public override TermFreqVector[] GetTermFreqVectors(int n)
79 int i = ReaderIndex(n); // find segment num
80 return subReaders[i].GetTermFreqVectors(n - starts[i]); // dispatch to segment
83 public override TermFreqVector GetTermFreqVector(int n, System.String field)
85 int i = ReaderIndex(n); // find segment num
86 return subReaders[i].GetTermFreqVector(n - starts[i], field);
89 public override int NumDocs()
91 lock (this)
93 if (numDocs == - 1)
95 // check cache
96 int n = 0; // cache miss--recompute
97 for (int i = 0; i < subReaders.Length; i++)
98 n += subReaders[i].NumDocs(); // sum from readers
99 numDocs = n;
101 return numDocs;
105 public override int MaxDoc()
107 return maxDoc;
110 public override Document Document(int n)
112 int i = ReaderIndex(n); // find segment num
113 return subReaders[i].Document(n - starts[i]); // dispatch to segment reader
116 public override bool IsDeleted(int n)
118 int i = ReaderIndex(n); // find segment num
119 return subReaders[i].IsDeleted(n - starts[i]); // dispatch to segment reader
122 public override bool HasDeletions()
124 return hasDeletions;
127 protected internal override void DoDelete(int n)
129 numDocs = - 1; // invalidate cache
130 int i = ReaderIndex(n); // find segment num
131 subReaders[i].Delete(n - starts[i]); // dispatch to segment reader
132 hasDeletions = true;
135 protected internal override void DoUndeleteAll()
137 for (int i = 0; i < subReaders.Length; i++)
138 subReaders[i].UndeleteAll();
139 hasDeletions = false;
142 private int ReaderIndex(int n)
144 // find reader for doc n:
145 int lo = 0; // search starts array
146 int hi = subReaders.Length - 1; // for first element less
148 while (hi >= lo)
150 int mid = (lo + hi) >> 1;
151 int midValue = starts[mid];
152 if (n < midValue)
153 hi = mid - 1;
154 else if (n > midValue)
155 lo = mid + 1;
156 else
158 // found a match
159 while (mid + 1 < subReaders.Length && starts[mid + 1] == midValue)
161 mid++; // scan to last match
163 return mid;
166 return hi;
169 public override byte[] Norms(System.String field)
171 lock (this)
173 byte[] bytes = (byte[]) normsCache[field];
174 if (bytes != null)
175 return bytes; // cache hit
177 bytes = new byte[MaxDoc()];
178 for (int i = 0; i < subReaders.Length; i++)
179 subReaders[i].Norms(field, bytes, starts[i]);
180 normsCache[field] = bytes; // update cache
181 return bytes;
185 public override void Norms(System.String field, byte[] result, int offset)
187 lock (this)
189 byte[] bytes = (byte[]) normsCache[field];
190 if (bytes != null)
191 // cache hit
192 Array.Copy(bytes, 0, result, offset, MaxDoc());
194 for (int i = 0; i < subReaders.Length; i++)
195 // read from segments
196 subReaders[i].Norms(field, result, offset + starts[i]);
200 protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed)
202 normsCache.Remove(field); // clear cache
203 int i = ReaderIndex(n); // find segment num
204 subReaders[i].SetNorm(n - starts[i], field, value_Renamed); // dispatch
207 public override TermEnum Terms()
209 return new MultiTermEnum(subReaders, starts, null);
212 public override TermEnum Terms(Term term)
214 return new MultiTermEnum(subReaders, starts, term);
217 public override int DocFreq(Term t)
219 int total = 0; // sum freqs in segments
220 for (int i = 0; i < subReaders.Length; i++)
221 total += subReaders[i].DocFreq(t);
222 return total;
225 public override TermDocs TermDocs()
227 return new MultiTermDocs(subReaders, starts);
230 public override TermPositions TermPositions()
232 return new MultiTermPositions(subReaders, starts);
235 protected internal override void DoCommit()
237 for (int i = 0; i < subReaders.Length; i++)
238 subReaders[i].Commit();
241 protected internal override void DoClose()
243 lock (this)
245 for (int i = 0; i < subReaders.Length; i++)
246 subReaders[i].Close();
250 /// <seealso cref="IndexReader#GetFieldNames()">
251 /// </seealso>
252 public override System.Collections.ICollection GetFieldNames()
254 // maintain a unique set of Field names
255 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
256 for (int i = 0; i < subReaders.Length; i++)
258 IndexReader reader = subReaders[i];
259 System.Collections.ICollection names = reader.GetFieldNames();
260 // iterate through the Field names and add them to the set
261 for (System.Collections.IEnumerator iterator = names.GetEnumerator(); iterator.MoveNext(); )
263 System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) iterator.Current;
264 System.String s = fi.Key.ToString();
265 if (fieldSet.ContainsKey(s) == false)
267 fieldSet.Add(s, s);
271 return fieldSet;
274 /// <seealso cref="IndexReader#GetFieldNames(boolean)">
275 /// </seealso>
276 public override System.Collections.ICollection GetFieldNames(bool indexed)
278 // maintain a unique set of Field names
279 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
280 for (int i = 0; i < subReaders.Length; i++)
282 IndexReader reader = subReaders[i];
283 System.Collections.ICollection names = reader.GetFieldNames(indexed);
284 for (System.Collections.IEnumerator iterator = names.GetEnumerator(); iterator.MoveNext(); )
286 System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) iterator.Current;
287 System.String s = fi.Key.ToString();
288 if (fieldSet.ContainsKey(s) == false)
290 fieldSet.Add(s, s);
294 return fieldSet;
297 public override System.Collections.ICollection GetIndexedFieldNames(bool storedTermVector)
299 // maintain a unique set of Field names
300 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
301 for (int i = 0; i < subReaders.Length; i++)
303 IndexReader reader = subReaders[i];
304 System.Collections.ICollection names = reader.GetIndexedFieldNames(storedTermVector);
305 foreach (object item in names)
307 fieldSet.Add(item,item);
310 return fieldSet;
314 class MultiTermEnum:TermEnum
316 private SegmentMergeQueue queue;
318 private Term term;
319 private int docFreq;
321 public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
323 queue = new SegmentMergeQueue(readers.Length);
324 for (int i = 0; i < readers.Length; i++)
326 IndexReader reader = readers[i];
327 TermEnum termEnum;
329 if (t != null)
331 termEnum = reader.Terms(t);
333 else
334 termEnum = reader.Terms();
336 SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
337 if (t == null?smi.Next():termEnum.Term() != null)
338 queue.Put(smi);
339 // initialize queue
340 else
341 smi.Close();
344 if (t != null && queue.Size() > 0)
346 Next();
350 public override bool Next()
352 SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();
353 if (top == null)
355 term = null;
356 return false;
359 term = top.term;
360 docFreq = 0;
362 while (top != null && term.CompareTo(top.term) == 0)
364 queue.Pop();
365 docFreq += top.termEnum.DocFreq(); // increment freq
366 if (top.Next())
367 queue.Put(top);
368 // restore queue
369 else
370 top.Close(); // done with a segment
371 top = (SegmentMergeInfo) queue.Top();
373 return true;
376 public override Term Term()
378 return term;
381 public override int DocFreq()
383 return docFreq;
386 public override void Close()
388 queue.Close();
392 class MultiTermDocs : TermDocs
394 protected internal IndexReader[] readers;
395 protected internal int[] starts;
396 protected internal Term term;
398 protected internal int base_Renamed = 0;
399 protected internal int pointer = 0;
401 private TermDocs[] readerTermDocs;
402 protected internal TermDocs current; // == readerTermDocs[pointer]
404 public MultiTermDocs(IndexReader[] r, int[] s)
406 readers = r;
407 starts = s;
409 readerTermDocs = new TermDocs[r.Length];
412 public virtual int Doc()
414 return base_Renamed + current.Doc();
416 public virtual int Freq()
418 return current.Freq();
421 public virtual void Seek(Term term)
423 this.term = term;
424 this.base_Renamed = 0;
425 this.pointer = 0;
426 this.current = null;
429 public virtual void Seek(TermEnum termEnum)
431 Seek(termEnum.Term());
434 public virtual bool Next()
436 if (current != null && current.Next())
438 return true;
440 else if (pointer < readers.Length)
442 base_Renamed = starts[pointer];
443 current = TermDocs(pointer++);
444 return Next();
446 else
447 return false;
450 /// <summary>Optimized implementation. </summary>
451 public virtual int Read(int[] docs, int[] freqs)
453 while (true)
455 while (current == null)
457 if (pointer < readers.Length)
459 // try next segment
460 base_Renamed = starts[pointer];
461 current = TermDocs(pointer++);
463 else
465 return 0;
468 int end = current.Read(docs, freqs);
469 if (end == 0)
471 // none left in segment
472 current = null;
474 else
476 // got some
477 int b = base_Renamed; // adjust doc numbers
478 for (int i = 0; i < end; i++)
479 docs[i] += b;
480 return end;
485 /// <summary>As yet unoptimized implementation. </summary>
486 public virtual bool SkipTo(int target)
490 if (!Next())
491 return false;
493 while (target > Doc());
494 return true;
497 private TermDocs TermDocs(int i)
499 if (term == null)
500 return null;
501 TermDocs result = readerTermDocs[i];
502 if (result == null)
503 result = readerTermDocs[i] = TermDocs(readers[i]);
504 result.Seek(term);
505 return result;
508 protected internal virtual TermDocs TermDocs(IndexReader reader)
510 return reader.TermDocs();
513 public virtual void Close()
515 for (int i = 0; i < readerTermDocs.Length; i++)
517 if (readerTermDocs[i] != null)
518 readerTermDocs[i].Close();
523 class MultiTermPositions:MultiTermDocs, TermPositions
525 public MultiTermPositions(IndexReader[] r, int[] s):base(r, s)
529 protected internal override TermDocs TermDocs(IndexReader reader)
531 return (TermDocs) reader.TermPositions();
534 public virtual int NextPosition()
536 return ((TermPositions) current).NextPosition();