Fixed #374055:Only the first "tag" is detected in digikam.
[beagle.git] / beagled / Lucene.Net / Index / TermInfosReader.cs
blob937024e1c9ea3567798dbd730ee3e3aaad78f25c
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
18 using Directory = Lucene.Net.Store.Directory;
20 namespace Lucene.Net.Index
23 /// <summary>This stores a monotonically increasing set of <Term, TermInfo> pairs in a
24 /// Directory. Pairs are accessed either by Term or by ordinal position the
25 /// set.
26 /// </summary>
28 public sealed class TermInfosReader
30 private Directory directory;
31 private System.String segment;
32 private FieldInfos fieldInfos;
34 private System.LocalDataStoreSlot enumerators = System.Threading.Thread.AllocateDataSlot();
35 private SegmentTermEnum origEnum;
36 private long size;
38 private Term[] indexTerms = null;
39 private TermInfo[] indexInfos;
40 private long[] indexPointers;
42 private SegmentTermEnum indexEnum;
44 public /*internal*/ TermInfosReader(Directory dir, System.String seg, FieldInfos fis)
46 directory = dir;
47 segment = seg;
48 fieldInfos = fis;
50 origEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tis"), fieldInfos, false);
51 size = origEnum.size;
53 indexEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tii"), fieldInfos, true);
56 /* Leaving this here will cause a memory leak under .NET 1.1
57 ~TermInfosReader()
59 // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
60 //System.Threading.Thread.SetData(enumerators, null);
64 public int GetSkipInterval()
66 return origEnum.skipInterval;
69 public /*internal*/ void Close()
71 if (origEnum != null)
72 origEnum.Close();
73 if (indexEnum != null)
74 indexEnum.Close();
77 /// <summary>Returns the number of term/value pairs in the set. </summary>
78 internal long Size()
80 return size;
83 private SegmentTermEnum GetEnum()
85 SegmentTermEnum termEnum = (SegmentTermEnum) System.Threading.Thread.GetData(enumerators);
86 if (termEnum == null)
88 termEnum = Terms();
89 System.Threading.Thread.SetData(enumerators, termEnum);
91 return termEnum;
94 private void EnsureIndexIsRead()
96 lock (this)
98 if (indexTerms != null)
99 // index already read
100 return ; // do nothing
103 int indexSize = (int) indexEnum.size; // otherwise read index
105 indexTerms = new Term[indexSize];
106 indexInfos = new TermInfo[indexSize];
107 indexPointers = new long[indexSize];
109 for (int i = 0; indexEnum.Next(); i++)
111 indexTerms[i] = indexEnum.Term();
112 indexInfos[i] = indexEnum.TermInfo();
113 indexPointers[i] = indexEnum.indexPointer;
116 finally
118 indexEnum.Close();
119 indexEnum = null;
124 /// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary>
125 private int GetIndexOffset(Term term)
127 int lo = 0; // binary search indexTerms[]
128 int hi = indexTerms.Length - 1;
130 while (hi >= lo)
132 int mid = (lo + hi) >> 1;
133 int delta = term.CompareTo(indexTerms[mid]);
134 if (delta < 0)
135 hi = mid - 1;
136 else if (delta > 0)
137 lo = mid + 1;
138 else
139 return mid;
141 return hi;
144 private void SeekEnum(int indexOffset)
146 GetEnum().Seek(indexPointers[indexOffset], (indexOffset * GetEnum().indexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]);
149 /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
150 public /*internal*/ TermInfo Get(Term term)
152 if (size == 0)
153 return null;
155 EnsureIndexIsRead();
157 // optimize sequential access: first try scanning cached enum w/o seeking
158 SegmentTermEnum enumerator = GetEnum();
159 if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
161 int enumOffset = (int) (enumerator.position / enumerator.indexInterval) + 1;
162 if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
163 return ScanEnum(term); // no need to seek
166 // random-access: must seek
167 SeekEnum(GetIndexOffset(term));
168 return ScanEnum(term);
171 /// <summary>Scans within block for matching term. </summary>
172 private TermInfo ScanEnum(Term term)
174 SegmentTermEnum enumerator = GetEnum();
175 enumerator.ScanTo(term);
176 if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
177 return enumerator.TermInfo();
178 else
179 return null;
182 /// <summary>Returns the nth term in the set. </summary>
183 internal Term Get(int position)
185 if (size == 0)
186 return null;
188 SegmentTermEnum enumerator = GetEnum();
189 if (enumerator != null && enumerator.Term() != null && position >= enumerator.position && position < (enumerator.position + enumerator.indexInterval))
190 return ScanEnum(position); // can avoid seek
192 SeekEnum(position / enumerator.indexInterval); // must seek
193 return ScanEnum(position);
196 private Term ScanEnum(int position)
198 SegmentTermEnum enumerator = GetEnum();
199 while (enumerator.position < position)
200 if (!enumerator.Next())
201 return null;
203 return enumerator.Term();
206 /// <summary>Returns the position of a Term in the set or -1. </summary>
207 internal long GetPosition(Term term)
209 if (size == 0)
210 return - 1;
212 EnsureIndexIsRead();
213 int indexOffset = GetIndexOffset(term);
214 SeekEnum(indexOffset);
216 SegmentTermEnum enumerator = GetEnum();
217 while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next())
221 if (term.CompareTo(enumerator.Term()) == 0)
222 return enumerator.position;
223 else
224 return - 1;
227 /// <summary>Returns an enumeration of all the Terms and TermInfos in the set. </summary>
228 public SegmentTermEnum Terms()
230 return (SegmentTermEnum) origEnum.Clone();
233 /// <summary>Returns an enumeration of terms starting at or after the named term. </summary>
234 public SegmentTermEnum Terms(Term term)
236 Get(term);
237 return (SegmentTermEnum) GetEnum().Clone();