Thumbnail file hits. Based on a patch from D Bera
[beagle.git] / beagled / Lucene.Net / Index / SegmentReader.cs
bloba4fca4b7235c09de23a038932bce66df75a1b81b
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 using System;
17 using Document = Lucene.Net.Documents.Document;
18 using Directory = Lucene.Net.Store.Directory;
19 using InputStream = Lucene.Net.Store.InputStream;
20 using OutputStream = Lucene.Net.Store.OutputStream;
21 using BitVector = Lucene.Net.Util.BitVector;
22 namespace Lucene.Net.Index
25 /// <summary> FIXME: Describe class <code>SegmentReader</code> here.
26 ///
27 /// </summary>
28 /// <version> $Id: SegmentReader.cs,v 1.2 2005/01/17 19:54:29 joeshaw Exp $
29 /// </version>
30 sealed public class SegmentReader : IndexReader
32 private System.String segment;
34 internal FieldInfos fieldInfos;
35 private FieldsReader fieldsReader;
37 internal TermInfosReader tis;
38 internal TermVectorsReader termVectorsReader;
40 internal BitVector deletedDocs = null;
41 private bool deletedDocsDirty = false;
42 private bool normsDirty = false;
43 private bool undeleteAll = false;
45 internal InputStream freqStream;
46 internal InputStream proxStream;
48 // Compound File Reader when based on a compound file segment
49 internal CompoundFileReader cfsReader;
51 private class Norm
53 private void InitBlock(SegmentReader enclosingInstance)
55 this.enclosingInstance = enclosingInstance;
57 private SegmentReader enclosingInstance;
58 public SegmentReader Enclosing_Instance
60 get
62 return enclosingInstance;
66 public Norm(SegmentReader enclosingInstance, InputStream in_Renamed, int number)
68 InitBlock(enclosingInstance);
69 this.in_Renamed = in_Renamed;
70 this.number = number;
73 public InputStream in_Renamed; // private -> public
74 public byte[] bytes; // private -> public
75 public bool dirty; // private -> public
76 public int number; // private -> public
78 public void ReWrite() // private -> public
80 // NOTE: norms are re-written in regular directory, not cfs
81 OutputStream out_Renamed = Enclosing_Instance.Directory().CreateFile(Enclosing_Instance.segment + ".tmp");
82 try
84 out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc());
86 finally
88 out_Renamed.Close();
90 System.String fileName = Enclosing_Instance.segment + ".f" + number;
91 Enclosing_Instance.Directory().RenameFile(Enclosing_Instance.segment + ".tmp", fileName);
92 this.dirty = false;
96 private System.Collections.Hashtable norms = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
98 public /*internal*/ SegmentReader(SegmentInfos sis, SegmentInfo si, bool closeDir) : base(si.dir, sis, closeDir)
100 Initialize(si);
103 public /*internal*/ SegmentReader(SegmentInfo si) : base(si.dir)
105 Initialize(si);
108 private void Initialize(SegmentInfo si)
110 segment = si.name;
112 // Use compound file directory for some files, if it exists
113 Directory cfsDir = Directory();
114 if (Directory().FileExists(segment + ".cfs"))
116 cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
117 cfsDir = cfsReader;
120 // No compound file exists - use the multi-file format
121 fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
122 fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
124 tis = new TermInfosReader(cfsDir, segment, fieldInfos);
126 // NOTE: the bitvector is stored using the regular directory, not cfs
127 if (HasDeletions(si))
128 deletedDocs = new BitVector(Directory(), segment + ".del");
130 // make sure that all index files have been read or are kept open
131 // so that if an index update removes them we'll still have them
132 freqStream = cfsDir.OpenFile(segment + ".frq");
133 proxStream = cfsDir.OpenFile(segment + ".prx");
134 OpenNorms(cfsDir);
136 if (fieldInfos.HasVectors())
138 // open term vector files only as needed
139 termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos);
143 protected internal override void DoCommit()
145 if (deletedDocsDirty)
147 // re-write deleted
148 deletedDocs.Write(Directory(), segment + ".tmp");
149 Directory().RenameFile(segment + ".tmp", segment + ".del");
151 if (undeleteAll && Directory().FileExists(segment + ".del"))
153 Directory().DeleteFile(segment + ".del");
155 if (normsDirty)
157 // re-write norms
158 System.Collections.IEnumerator values = norms.Values.GetEnumerator();
159 while (values.MoveNext())
161 Norm norm = (Norm) values.Current;
162 if (norm.dirty)
164 norm.ReWrite();
168 deletedDocsDirty = false;
169 normsDirty = false;
170 undeleteAll = false;
173 protected internal override void DoClose()
175 fieldsReader.Close();
176 tis.Close();
178 if (freqStream != null)
179 freqStream.Close();
180 if (proxStream != null)
181 proxStream.Close();
183 CloseNorms();
184 if (termVectorsReader != null)
185 termVectorsReader.Close();
187 if (cfsReader != null)
188 cfsReader.Close();
191 internal static bool HasDeletions(SegmentInfo si)
193 return si.dir.FileExists(si.name + ".del");
196 public override bool HasDeletions()
198 return deletedDocs != null;
202 internal static bool UsesCompoundFile(SegmentInfo si)
204 return si.dir.FileExists(si.name + ".cfs");
207 internal static bool HasSeparateNorms(SegmentInfo si)
209 System.String[] result = si.dir.List();
210 System.String pattern = si.name + ".f";
211 int patternLength = pattern.Length;
212 for (int i = 0; i < 0; i++)
214 if (result[i].StartsWith(pattern) && System.Char.IsDigit(result[i][patternLength]))
215 return true;
217 return false;
220 protected internal override void DoDelete(int docNum)
222 if (deletedDocs == null)
223 deletedDocs = new BitVector(MaxDoc());
224 deletedDocsDirty = true;
225 undeleteAll = false;
226 deletedDocs.Set(docNum);
229 protected internal override void DoUndeleteAll()
231 deletedDocs = null;
232 deletedDocsDirty = false;
233 undeleteAll = true;
236 internal System.Collections.ArrayList Files()
238 System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16));
239 System.String[] ext = new System.String[]{"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp"};
241 for (int i = 0; i < ext.Length; i++)
243 System.String name = segment + "." + ext[i];
244 if (Directory().FileExists(name))
245 files.Add(name);
248 for (int i = 0; i < fieldInfos.Size(); i++)
250 FieldInfo fi = fieldInfos.FieldInfo(i);
251 if (fi.isIndexed)
252 files.Add(segment + ".f" + i);
254 return files;
257 public override TermEnum Terms()
259 return tis.Terms();
262 public override TermEnum Terms(Term t)
264 return tis.Terms(t);
267 public override Document Document(int n)
269 lock (this)
271 if (IsDeleted(n))
272 throw new System.ArgumentException("attempt to access a deleted document");
273 return fieldsReader.Doc(n);
277 public override bool IsDeleted(int n)
279 lock (this)
281 return (deletedDocs != null && deletedDocs.Get(n));
285 public override TermDocs TermDocs()
287 return new SegmentTermDocs(this);
290 public override TermPositions TermPositions()
292 return new SegmentTermPositions(this);
295 public override int DocFreq(Term t)
297 TermInfo ti = tis.Get(t);
298 if (ti != null)
299 return ti.docFreq;
300 else
301 return 0;
304 public override int NumDocs()
306 int n = MaxDoc();
307 if (deletedDocs != null)
308 n -= deletedDocs.Count();
309 return n;
312 public override int MaxDoc()
314 return fieldsReader.Size();
317 /// <seealso cref="IndexReader#GetFieldNames()">
318 /// </seealso>
319 public override System.Collections.ICollection GetFieldNames()
321 // maintain a unique set of Field names
322 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
323 for (int i = 0; i < fieldInfos.Size(); i++)
325 FieldInfo fi = fieldInfos.FieldInfo(i);
326 fieldSet.Add(fi.name, fi.name);
328 return fieldSet;
331 /// <seealso cref="IndexReader#GetFieldNames(boolean)">
332 /// </seealso>
333 public override System.Collections.ICollection GetFieldNames(bool indexed)
335 // maintain a unique set of Field names
336 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
337 for (int i = 0; i < fieldInfos.Size(); i++)
339 FieldInfo fi = fieldInfos.FieldInfo(i);
340 if (fi.isIndexed == indexed)
341 fieldSet.Add(fi.name, fi.name);
343 return fieldSet;
346 /// <summary> </summary>
347 /// <param name="storedTermVector">if true, returns only Indexed fields that have term vector info,
348 /// else only indexed fields without term vector info
349 /// </param>
350 /// <returns> Collection of Strings indicating the names of the fields
351 /// </returns>
352 public override System.Collections.ICollection GetIndexedFieldNames(bool storedTermVector)
354 // maintain a unique set of Field names
355 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
356 for (int ii = 0; ii < fieldInfos.Size(); ii++)
358 FieldInfo fi = fieldInfos.FieldInfo(ii);
359 if (fi.isIndexed == true && fi.storeTermVector == storedTermVector)
361 fieldSet.Add(fi.name, fi.name);
364 return fieldSet;
367 public override byte[] Norms(System.String field)
369 lock (this)
371 Norm norm = (Norm) norms[field];
372 if (norm == null)
373 // not an indexed Field
374 return null;
375 if (norm.bytes == null)
377 // value not yet read
378 byte[] bytes = new byte[MaxDoc()];
379 Norms(field, bytes, 0);
380 norm.bytes = bytes; // cache it
382 return norm.bytes;
386 protected internal override void DoSetNorm(int doc, System.String field, byte value_Renamed)
388 Norm norm = (Norm) norms[field];
389 if (norm == null)
390 // not an indexed Field
391 return ;
392 norm.dirty = true; // mark it dirty
393 normsDirty = true;
395 Norms(field)[doc] = value_Renamed; // set the value
398 /// <summary>Read norms into a pre-allocated array. </summary>
399 public override void Norms(System.String field, byte[] bytes, int offset)
401 lock (this)
404 Norm norm = (Norm) norms[field];
405 if (norm == null)
406 return ; // use zeros in array
408 if (norm.bytes != null)
410 // can copy from cache
411 Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
412 return ;
415 InputStream normStream = (InputStream) norm.in_Renamed.Clone();
418 // read from disk
419 normStream.Seek(0);
420 normStream.ReadBytes(bytes, offset, MaxDoc());
422 finally
424 normStream.Close();
429 private void OpenNorms(Directory cfsDir)
431 for (int i = 0; i < fieldInfos.Size(); i++)
433 FieldInfo fi = fieldInfos.FieldInfo(i);
434 if (fi.isIndexed)
436 System.String fileName = segment + ".f" + fi.number;
437 // look first for re-written file, then in compound format
438 Directory d = Directory().FileExists(fileName)?Directory():cfsDir;
439 norms[fi.name] = new Norm(this, d.OpenFile(fileName), fi.number);
444 private void CloseNorms()
446 lock (norms.SyncRoot)
448 System.Collections.IEnumerator enumerator = norms.Values.GetEnumerator();
449 while (enumerator.MoveNext())
451 Norm norm = (Norm) enumerator.Current;
452 norm.in_Renamed.Close();
457 /// <summary>Return a term frequency vector for the specified document and Field. The
458 /// vector returned contains term numbers and frequencies for all terms in
459 /// the specified Field of this document, if the Field had storeTermVector
460 /// flag set. If the flag was not set, the method returns null.
461 /// </summary>
462 public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
464 // Check if this Field is invalid or has no stored term vector
465 FieldInfo fi = fieldInfos.FieldInfo(field);
466 if (fi == null || !fi.storeTermVector)
467 return null;
469 return termVectorsReader.Get(docNumber, field);
473 /// <summary>Return an array of term frequency vectors for the specified document.
474 /// The array contains a vector for each vectorized Field in the document.
475 /// Each vector vector contains term numbers and frequencies for all terms
476 /// in a given vectorized Field.
477 /// If no such fields existed, the method returns null.
478 /// </summary>
479 public override TermFreqVector[] GetTermFreqVectors(int docNumber)
481 if (termVectorsReader == null)
482 return null;
484 return termVectorsReader.Get(docNumber);