Add --enable-deletion option to buildindex. If used, buildindex will remove deleted...
[beagle.git] / beagled / Lucene.Net / Index / SegmentsReader.cs
blobb5b343027bb8e46a4648ead11b484395dbf3bbf3
1 using System;
2 using System.Collections;
3 using System.Runtime.CompilerServices;
5 using Lucene.Net.Documents;
6 using Lucene.Net.Store;
8 namespace Lucene.Net.Index
10 /* ====================================================================
11 * The Apache Software License, Version 1.1
13 * Copyright (c) 2001 The Apache Software Foundation. All rights
14 * reserved.
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in
25 * the documentation and/or other materials provided with the
26 * distribution.
28 * 3. The end-user documentation included with the redistribution,
29 * if any, must include the following acknowledgment:
30 * "This product includes software developed by the
31 * Apache Software Foundation (http://www.apache.org/)."
32 * Alternately, this acknowledgment may appear in the software itself,
33 * if and wherever such third-party acknowledgments normally appear.
35 * 4. The names "Apache" and "Apache Software Foundation" and
36 * "Apache Lucene" must not be used to endorse or promote products
37 * derived from this software without prior written permission. For
38 * written permission, please contact apache@apache.org.
40 * 5. Products derived from this software may not be called "Apache",
41 * "Apache Lucene", nor may "Apache" appear in their name, without
42 * prior written permission of the Apache Software Foundation.
44 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
47 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
48 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
49 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
50 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
51 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
52 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
53 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
54 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 * ====================================================================
58 * This software consists of voluntary contributions made by many
59 * individuals on behalf of the Apache Software Foundation. For more
60 * information on the Apache Software Foundation, please see
61 * <http://www.apache.org/>.
64 sealed class SegmentsReader : IndexReader
66 private SegmentReader[] readers;
67 private int[] starts; // 1st docno for each segment
68 private Hashtable normsCache = new Hashtable();
69 private int maxDoc = 0;
70 private int numDocs = -1;
71 private bool hasDeletions = false;
73 internal SegmentsReader(Directory directory, SegmentReader[] r)
74 : base(directory)
76 readers = r;
77 starts = new int[readers.Length + 1]; // build starts array
78 for (int i = 0; i < readers.Length; i++)
80 starts[i] = maxDoc;
81 maxDoc += readers[i].MaxDoc(); // compute maxDocs
83 if (readers[i].HasDeletions())
84 hasDeletions = true;
86 starts[readers.Length] = maxDoc;
89 [MethodImpl(MethodImplOptions.Synchronized)]
90 override public int NumDocs()
92 if (numDocs == -1)
93 { // check cache
94 int n = 0; // cache miss--recompute
95 for (int i = 0; i < readers.Length; i++)
96 n += readers[i].NumDocs(); // sum from readers
97 numDocs = n;
99 return numDocs;
102 override public int MaxDoc()
104 return maxDoc;
107 override public Document Document(int n)
109 int i = ReaderIndex(n); // find segment num
110 return readers[i].Document(n - starts[i]); // dispatch to segment reader
113 override public bool IsDeleted(int n)
115 int i = ReaderIndex(n); // find segment num
116 return readers[i].IsDeleted(n - starts[i]); // dispatch to segment reader
119 public override bool HasDeletions()
121 return hasDeletions;
124 [MethodImpl(MethodImplOptions.Synchronized)]
125 override protected internal void DoDelete(int n)
127 numDocs = -1; // invalidate cache
128 int i = ReaderIndex(n); // find segment num
129 readers[i].DoDelete(n - starts[i]); // dispatch to segment reader
130 hasDeletions = true;
133 public override void UndeleteAll()
135 for (int i = 0; i < readers.Length; i++)
136 readers[i].UndeleteAll();
139 private int ReaderIndex(int n)
140 { // find reader for doc n:
141 int lo = 0; // search starts array
142 int hi = readers.Length - 1; // for first element less
144 while (hi >= lo)
146 int mid = (lo + hi) >> 1;
147 int midValue = starts[mid];
148 if (n < midValue)
149 hi = mid - 1;
150 else if (n > midValue)
151 lo = mid + 1;
152 else
153 { // found a match
154 while (mid+1 < readers.Length && starts[mid+1] == midValue)
156 mid++; // scan to last match
158 return mid;
161 return hi;
164 [MethodImpl(MethodImplOptions.Synchronized)]
165 override public byte[] Norms(String field)
167 byte[] bytes = (byte[])normsCache[field];
168 if (bytes != null)
169 return bytes; // cache hit
171 bytes = new byte[MaxDoc()];
172 for (int i = 0; i < readers.Length; i++)
173 readers[i].Norms(field, bytes, starts[i]);
174 normsCache.Add(field, bytes); // update cache
175 return bytes;
178 override public TermEnum Terms()
180 return new SegmentsTermEnum(readers, starts, null);
183 override public TermEnum Terms(Term term)
185 return new SegmentsTermEnum(readers, starts, term);
188 override public int DocFreq(Term t)
190 int total = 0; // sum freqs in segments
191 for (int i = 0; i < readers.Length; i++)
192 total += readers[i].DocFreq(t);
193 return total;
196 override public TermDocs TermDocs()
198 return new SegmentsTermDocs(readers, starts);
201 override public TermPositions TermPositions()
203 return new SegmentsTermPositions(readers, starts);
206 [MethodImpl(MethodImplOptions.Synchronized)]
207 override protected internal void DoClose()
209 for (int i = 0; i < readers.Length; i++)
210 readers[i].Close();
213 override public ICollection GetFieldNames()
215 // maintain a unique set of field names
216 Hashtable fieldSet = new Hashtable();
217 for (int i = 0; i < readers.Length; i++)
219 SegmentReader reader = readers[i];
220 ICollection names = reader.GetFieldNames();
221 // iterate through the field names and add them to the set
222 foreach (string de in names)
224 fieldSet[de] = "";
227 return fieldSet.Keys;
230 public override ICollection GetFieldNames(bool indexed)
232 // maintain a unique set of field names
233 Hashtable fieldSet = new Hashtable();
234 for (int i = 0; i < readers.Length; i++)
236 SegmentReader reader = readers[i];
237 ICollection names = reader.GetFieldNames(indexed);
238 foreach (string de in names)
240 fieldSet[de] = "";
243 return fieldSet.Keys;
247 class SegmentsTermEnum : TermEnum
249 private SegmentMergeQueue queue;
251 private Term term;
252 private int docFreq;
254 internal SegmentsTermEnum(SegmentReader[] readers, int[] starts, Term t)
256 queue = new SegmentMergeQueue(readers.Length);
257 for (int i = 0; i < readers.Length; i++)
259 SegmentReader reader = readers[i];
260 SegmentTermEnum termEnum;
262 if (t != null)
264 termEnum = (SegmentTermEnum)reader.Terms(t);
266 else
267 termEnum = (SegmentTermEnum)reader.Terms();
269 SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
270 if (t == null ? smi.Next() : termEnum.Term() != null)
271 queue.Put(smi); // initialize queue
272 else
273 smi.Close();
276 if (t != null && queue.Size() > 0)
278 Next();
282 override public bool Next()
284 SegmentMergeInfo top = (SegmentMergeInfo)queue.Top();
285 if (top == null)
287 term = null;
288 return false;
291 term = top.term;
292 docFreq = 0;
294 while (top != null && term.CompareTo(top.term) == 0)
296 queue.Pop();
297 docFreq += top.termEnum.DocFreq(); // increment freq
298 if (top.Next())
299 queue.Put(top); // restore queue
300 else
301 top.Close(); // done with a segment
302 top = (SegmentMergeInfo)queue.Top();
304 return true;
307 override public Term Term()
309 return term;
312 override public int DocFreq()
314 return docFreq;
317 override public void Close()
319 queue.Close();
323 class SegmentsTermDocs : TermDocs
325 protected SegmentReader[] readers;
326 protected int[] starts;
327 protected Term term;
329 protected int _base = 0;
330 protected int pointer = 0;
332 private SegmentTermDocs[] segTermDocs;
333 protected SegmentTermDocs current; // == segTermDocs[pointer]
335 internal SegmentsTermDocs(SegmentReader[] r, int[] s)
337 readers = r;
338 starts = s;
340 segTermDocs = new SegmentTermDocs[r.Length];
343 public int Doc()
345 return _base + current.doc;
347 public int Freq()
349 return current.freq;
352 public void Seek(Term term)
354 this.term = term;
355 this._base = 0;
356 this.pointer = 0;
357 this.current = null;
360 public void Seek(TermEnum termEnum)
362 Seek(termEnum.Term());
365 public bool Next()
367 if (current != null && current.Next())
369 return true;
371 else if (pointer < readers.Length)
373 _base = starts[pointer];
374 current = TermDocs(pointer++);
375 return Next();
377 else
378 return false;
381 /// <summary>
382 /// Optimized implementation.
383 /// </summary>
384 /// <param name="docs"></param>
385 /// <param name="freqs"></param>
386 /// <returns></returns>
387 public int Read(int[] docs, int[] freqs)
389 while (true)
391 while (current == null)
393 if (pointer < readers.Length)
394 { // try next segment
395 _base = starts[pointer];
396 current = TermDocs(pointer++);
398 else
400 return 0;
403 int end = current.Read(docs, freqs);
404 if (end == 0)
405 { // none left in segment
406 current = null;
408 else
409 { // got some
410 int b = _base; // adjust doc numbers
411 for (int i = 0; i < end; i++)
412 docs[i] += b;
413 return end;
418 /// <summary>
419 /// As yet unoptimized implementation.
420 /// </summary>
421 /// <param name="target"></param>
422 /// <returns></returns>
423 public bool SkipTo(int target)
427 if (!Next())
428 return false;
429 } while (target > Doc());
430 return true;
433 private SegmentTermDocs TermDocs(int i)
435 if (term == null)
436 return null;
437 SegmentTermDocs result = segTermDocs[i];
438 if (result == null)
439 result = segTermDocs[i] = TermDocs(readers[i]);
440 result.Seek(term);
441 return result;
444 virtual protected SegmentTermDocs TermDocs(SegmentReader reader)
446 return (SegmentTermDocs)reader.TermDocs();
449 public void Close()
451 for (int i = 0; i < segTermDocs.Length; i++)
453 if (segTermDocs[i] != null)
454 segTermDocs[i].Close();
459 class SegmentsTermPositions : SegmentsTermDocs, TermPositions
461 internal SegmentsTermPositions(SegmentReader[] r, int[] s) : base(r,s)
465 override protected SegmentTermDocs TermDocs(SegmentReader reader)
467 return (SegmentTermDocs)reader.TermPositions();
470 public int NextPosition()
472 return ((SegmentTermPositions)current).NextPosition();