Add --enable-deletion option to buildindex. If used, buildindex will remove deleted...
[beagle.git] / beagled / Lucene.Net / Index / TermInfosReader.cs
bloba004199c66e5e3c9ced8388c9cd49ce5f978245b
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 using System;
17 using Directory = Lucene.Net.Store.Directory;
18 namespace Lucene.Net.Index
21 /// <summary>This stores a monotonically increasing set of <Term, TermInfo> pairs in a
22 /// Directory. Pairs are accessed either by Term or by ordinal position the
23 /// set.
24 /// </summary>
26 sealed public class TermInfosReader
28 private Directory directory;
29 private System.String segment;
30 private FieldInfos fieldInfos;
32 private System.LocalDataStoreSlot enumerators = System.Threading.Thread.AllocateDataSlot();
33 private SegmentTermEnum origEnum;
34 private long size;
36 private Term[] indexTerms = null;
37 private TermInfo[] indexInfos;
38 private long[] indexPointers;
40 private SegmentTermEnum indexEnum;
42 public /*internal*/ TermInfosReader(Directory dir, System.String seg, FieldInfos fis)
44 directory = dir;
45 segment = seg;
46 fieldInfos = fis;
48 origEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tis"), fieldInfos, false);
49 size = origEnum.size;
51 indexEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tii"), fieldInfos, true);
54 ~TermInfosReader()
56 // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
57 try
59 System.Threading.Thread.SetData(enumerators, null); // {{Aroush-1.9}} is this required for .NET ?!
61 catch (Exception ex)
63 System.Console.WriteLine(ex.Message);
67 public int GetSkipInterval()
69 return origEnum.skipInterval;
72 public /*internal*/ void Close()
74 if (origEnum != null)
75 origEnum.Close();
76 if (indexEnum != null)
77 indexEnum.Close();
80 /// <summary>Returns the number of term/value pairs in the set. </summary>
81 internal long Size()
83 return size;
86 private SegmentTermEnum GetEnum()
88 SegmentTermEnum termEnum = (SegmentTermEnum) System.Threading.Thread.GetData(enumerators);
89 if (termEnum == null)
91 termEnum = Terms();
92 System.Threading.Thread.SetData(enumerators, termEnum);
94 return termEnum;
97 private void EnsureIndexIsRead()
99 lock (this)
101 if (indexTerms != null)
102 // index already read
103 return ; // do nothing
106 int indexSize = (int) indexEnum.size; // otherwise read index
108 indexTerms = new Term[indexSize];
109 indexInfos = new TermInfo[indexSize];
110 indexPointers = new long[indexSize];
112 for (int i = 0; indexEnum.Next(); i++)
114 indexTerms[i] = indexEnum.Term();
115 indexInfos[i] = indexEnum.TermInfo();
116 indexPointers[i] = indexEnum.indexPointer;
119 finally
121 indexEnum.Close();
122 indexEnum = null;
127 /// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary>
128 private int GetIndexOffset(Term term)
130 int lo = 0; // binary search indexTerms[]
131 int hi = indexTerms.Length - 1;
133 while (hi >= lo)
135 int mid = (lo + hi) >> 1;
136 int delta = term.CompareTo(indexTerms[mid]);
137 if (delta < 0)
138 hi = mid - 1;
139 else if (delta > 0)
140 lo = mid + 1;
141 else
142 return mid;
144 return hi;
147 private void SeekEnum(int indexOffset)
149 GetEnum().Seek(indexPointers[indexOffset], (indexOffset * GetEnum().indexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]);
152 /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
153 public /*internal*/ TermInfo Get(Term term)
155 if (size == 0)
156 return null;
158 EnsureIndexIsRead();
160 // optimize sequential access: first try scanning cached enum w/o seeking
161 SegmentTermEnum enumerator = GetEnum();
162 if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
164 int enumOffset = (int) (enumerator.position / enumerator.indexInterval) + 1;
165 if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
166 return ScanEnum(term); // no need to seek
169 // random-access: must seek
170 SeekEnum(GetIndexOffset(term));
171 return ScanEnum(term);
174 /// <summary>Scans within block for matching term. </summary>
175 private TermInfo ScanEnum(Term term)
177 SegmentTermEnum enumerator = GetEnum();
178 enumerator.ScanTo(term);
179 if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
180 return enumerator.TermInfo();
181 else
182 return null;
185 /// <summary>Returns the nth term in the set. </summary>
186 internal Term Get(int position)
188 if (size == 0)
189 return null;
191 SegmentTermEnum enumerator = GetEnum();
192 if (enumerator != null && enumerator.Term() != null && position >= enumerator.position && position < (enumerator.position + enumerator.indexInterval))
193 return ScanEnum(position); // can avoid seek
195 SeekEnum(position / enumerator.indexInterval); // must seek
196 return ScanEnum(position);
199 private Term ScanEnum(int position)
201 SegmentTermEnum enumerator = GetEnum();
202 while (enumerator.position < position)
203 if (!enumerator.Next())
204 return null;
206 return enumerator.Term();
209 /// <summary>Returns the position of a Term in the set or -1. </summary>
210 internal long GetPosition(Term term)
212 if (size == 0)
213 return - 1;
215 EnsureIndexIsRead();
216 int indexOffset = GetIndexOffset(term);
217 SeekEnum(indexOffset);
219 SegmentTermEnum enumerator = GetEnum();
220 while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next())
224 if (term.CompareTo(enumerator.Term()) == 0)
225 return enumerator.position;
226 else
227 return - 1;
230 /// <summary>Returns an enumeration of all the Terms and TermInfos in the set. </summary>
231 public SegmentTermEnum Terms()
233 return (SegmentTermEnum) origEnum.Clone();
236 /// <summary>Returns an enumeration of terms starting at or after the named term. </summary>
237 public SegmentTermEnum Terms(Term term)
239 Get(term);
240 return (SegmentTermEnum) GetEnum().Clone();