cvsimport
[beagle.git] / beagled / Lucene.Net / Index / SegmentReader.cs
blobbb812a707292766780edbb8191e10b6f5462eab7
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
18 using Document = Lucene.Net.Documents.Document;
19 using Field = Lucene.Net.Documents.Field;
20 using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity;
21 using Directory = Lucene.Net.Store.Directory;
22 using IndexInput = Lucene.Net.Store.IndexInput;
23 using IndexOutput = Lucene.Net.Store.IndexOutput;
24 using BitVector = Lucene.Net.Util.BitVector;
26 namespace Lucene.Net.Index
29 /// <version> $Id: SegmentReader.cs,v 1.8 2006/11/29 19:25:43 joeshaw Exp $
30 /// </version>
31 public class SegmentReader : IndexReader
33 private System.String segment;
35 internal FieldInfos fieldInfos;
36 private FieldsReader fieldsReader;
38 internal TermInfosReader tis;
39 internal TermVectorsReader termVectorsReaderOrig = null;
40 internal System.LocalDataStoreSlot termVectorsLocal = System.Threading.Thread.AllocateDataSlot();
42 internal BitVector deletedDocs = null;
43 private bool deletedDocsDirty = false;
44 private bool normsDirty = false;
45 private bool undeleteAll = false;
47 internal IndexInput freqStream;
48 internal IndexInput proxStream;
50 // Compound File Reader when based on a compound file segment
51 internal CompoundFileReader cfsReader = null;
53 public FieldInfos FieldInfos
55 get { return fieldInfos; }
58 private class Norm
60 private void InitBlock(SegmentReader enclosingInstance)
62 this.enclosingInstance = enclosingInstance;
64 private SegmentReader enclosingInstance;
65 public SegmentReader Enclosing_Instance
67 get
69 return enclosingInstance;
73 public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number)
75 InitBlock(enclosingInstance);
76 this.in_Renamed = in_Renamed;
77 this.number = number;
80 public IndexInput in_Renamed;
81 public byte[] bytes;
82 public bool dirty;
83 public int number;
85 public void ReWrite()
87 // NOTE: norms are re-written in regular directory, not cfs
88 IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(Enclosing_Instance.segment + ".tmp");
89 try
91 out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc());
93 finally
95 out_Renamed.Close();
97 System.String fileName;
98 if (Enclosing_Instance.cfsReader == null)
99 fileName = Enclosing_Instance.segment + ".f" + number;
100 else
102 // use a different file name if we have compound format
103 fileName = Enclosing_Instance.segment + ".s" + number;
105 Enclosing_Instance.Directory().RenameFile(Enclosing_Instance.segment + ".tmp", fileName);
106 this.dirty = false;
110 private System.Collections.Hashtable norms = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
112 /// <summary>The class which implements SegmentReader. </summary>
113 private static System.Type IMPL;
115 public SegmentReader() : base(null)
119 public static SegmentReader Get(SegmentInfo si)
121 return Get(si.dir, si, null, false, false);
124 public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir)
126 return Get(si.dir, si, sis, closeDir, true);
129 public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir)
131 SegmentReader instance;
134 instance = (SegmentReader) System.Activator.CreateInstance(IMPL);
136 catch (System.Exception e)
138 throw new System.SystemException("cannot load SegmentReader class: " + e);
140 instance.Init(dir, sis, closeDir, ownDir);
141 instance.Initialize(si);
142 return instance;
145 private void Initialize(SegmentInfo si)
147 segment = si.name;
149 // Use compound file directory for some files, if it exists
150 Directory cfsDir = Directory();
151 if (Directory().FileExists(segment + ".cfs"))
153 cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
154 cfsDir = cfsReader;
157 // No compound file exists - use the multi-file format
158 fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
159 fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
161 tis = new TermInfosReader(cfsDir, segment, fieldInfos);
163 // NOTE: the bitvector is stored using the regular directory, not cfs
164 if (HasDeletions(si))
165 deletedDocs = new BitVector(Directory(), segment + ".del");
167 // make sure that all index files have been read or are kept open
168 // so that if an index update removes them we'll still have them
169 freqStream = cfsDir.OpenInput(segment + ".frq");
170 proxStream = cfsDir.OpenInput(segment + ".prx");
171 OpenNorms(cfsDir);
173 if (fieldInfos.HasVectors())
175 // open term vector files only as needed
176 termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
180 /* Leaving this here will cause a memory leak under .NET 1.1
181 ~SegmentReader()
183 // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
184 //System.Threading.Thread.SetData(termVectorsLocal, null);
188 protected internal override void DoCommit()
190 if (deletedDocsDirty)
192 // re-write deleted
193 deletedDocs.Write(Directory(), segment + ".tmp");
194 Directory().RenameFile(segment + ".tmp", segment + ".del");
196 if (undeleteAll && Directory().FileExists(segment + ".del"))
198 Directory().DeleteFile(segment + ".del");
200 if (normsDirty)
202 // re-write norms
203 System.Collections.IEnumerator values = norms.Values.GetEnumerator();
204 while (values.MoveNext())
206 Norm norm = (Norm) values.Current;
207 if (norm.dirty)
209 norm.ReWrite();
213 deletedDocsDirty = false;
214 normsDirty = false;
215 undeleteAll = false;
218 protected internal override void DoClose()
220 fieldsReader.Close();
221 tis.Close();
223 if (freqStream != null)
224 freqStream.Close();
225 if (proxStream != null)
226 proxStream.Close();
228 CloseNorms();
230 if (termVectorsReaderOrig != null)
231 termVectorsReaderOrig.Close();
233 if (cfsReader != null)
234 cfsReader.Close();
237 internal static bool HasDeletions(SegmentInfo si)
239 return si.dir.FileExists(si.name + ".del");
242 public override bool HasDeletions()
244 return deletedDocs != null;
248 internal static bool UsesCompoundFile(SegmentInfo si)
250 return si.dir.FileExists(si.name + ".cfs");
253 internal static bool HasSeparateNorms(SegmentInfo si)
255 System.String[] result = si.dir.List();
256 System.String pattern = si.name + ".s";
257 int patternLength = pattern.Length;
258 for (int i = 0; i < result.Length; i++)
260 if (result[i].StartsWith(pattern) && System.Char.IsDigit(result[i][patternLength]))
261 return true;
263 return false;
266 protected internal override void DoDelete(int docNum)
268 if (deletedDocs == null)
269 deletedDocs = new BitVector(MaxDoc());
270 deletedDocsDirty = true;
271 undeleteAll = false;
272 deletedDocs.Set(docNum);
275 protected internal override void DoUndeleteAll()
277 deletedDocs = null;
278 deletedDocsDirty = false;
279 undeleteAll = true;
282 internal virtual System.Collections.ArrayList Files()
284 System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16));
286 for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++)
288 System.String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i];
289 if (Directory().FileExists(name))
290 files.Add(name);
293 for (int i = 0; i < fieldInfos.Size(); i++)
295 FieldInfo fi = fieldInfos.FieldInfo(i);
296 if (fi.isIndexed && !fi.omitNorms)
298 System.String name;
299 if (cfsReader == null)
300 name = segment + ".f" + i;
301 else
302 name = segment + ".s" + i;
303 if (Directory().FileExists(name))
304 files.Add(name);
307 return files;
310 public override TermEnum Terms()
312 return tis.Terms();
315 public override TermEnum Terms(Term t)
317 return tis.Terms(t);
320 public override Document Document(int n)
322 lock (this)
324 if (IsDeleted(n))
325 throw new System.ArgumentException("attempt to access a deleted document");
326 return fieldsReader.Doc(n);
330 public override bool IsDeleted(int n)
332 lock (this)
334 return (deletedDocs != null && deletedDocs.Get(n));
338 public override TermDocs TermDocs()
340 return new SegmentTermDocs(this);
343 public override TermPositions TermPositions()
345 return new SegmentTermPositions(this);
348 public override int DocFreq(Term t)
350 TermInfo ti = tis.Get(t);
351 if (ti != null)
352 return ti.docFreq;
353 else
354 return 0;
357 public override int NumDocs()
359 int n = MaxDoc();
360 if (deletedDocs != null)
361 n -= deletedDocs.Count();
362 return n;
365 public override int MaxDoc()
367 return fieldsReader.Size();
370 /// <seealso cref="IndexReader.GetFieldNames()">
371 /// </seealso>
372 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
373 /// </deprecated>
374 public override System.Collections.ICollection GetFieldNames()
376 // maintain a unique set of field names
377 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
378 for (int i = 0; i < fieldInfos.Size(); i++)
380 FieldInfo fi = fieldInfos.FieldInfo(i);
381 fieldSet.Add(fi.name, fi.name);
383 return fieldSet;
386 /// <seealso cref="IndexReader.GetFieldNames(boolean)">
387 /// </seealso>
388 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
389 /// </deprecated>
390 public override System.Collections.ICollection GetFieldNames(bool indexed)
392 // maintain a unique set of field names
393 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
394 for (int i = 0; i < fieldInfos.Size(); i++)
396 FieldInfo fi = fieldInfos.FieldInfo(i);
397 if (fi.isIndexed == indexed)
398 fieldSet.Add(fi.name, fi.name);
400 return fieldSet;
403 /// <seealso cref="IndexReader.GetIndexedFieldNames(Field.TermVector tvSpec)">
404 /// </seealso>
405 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
406 /// </deprecated>
407 public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec)
409 bool storedTermVector;
410 bool storePositionWithTermVector;
411 bool storeOffsetWithTermVector;
413 if (tvSpec == Field.TermVector.NO)
415 storedTermVector = false;
416 storePositionWithTermVector = false;
417 storeOffsetWithTermVector = false;
419 else if (tvSpec == Field.TermVector.YES)
421 storedTermVector = true;
422 storePositionWithTermVector = false;
423 storeOffsetWithTermVector = false;
425 else if (tvSpec == Field.TermVector.WITH_POSITIONS)
427 storedTermVector = true;
428 storePositionWithTermVector = true;
429 storeOffsetWithTermVector = false;
431 else if (tvSpec == Field.TermVector.WITH_OFFSETS)
433 storedTermVector = true;
434 storePositionWithTermVector = false;
435 storeOffsetWithTermVector = true;
437 else if (tvSpec == Field.TermVector.WITH_POSITIONS_OFFSETS)
439 storedTermVector = true;
440 storePositionWithTermVector = true;
441 storeOffsetWithTermVector = true;
443 else
445 throw new System.ArgumentException("unknown termVector parameter " + tvSpec);
448 // maintain a unique set of field names
449 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
450 for (int i = 0; i < fieldInfos.Size(); i++)
452 FieldInfo fi = fieldInfos.FieldInfo(i);
453 if (fi.isIndexed && fi.storeTermVector == storedTermVector && fi.storePositionWithTermVector == storePositionWithTermVector && fi.storeOffsetWithTermVector == storeOffsetWithTermVector)
455 fieldSet.Add(fi.name, fi.name);
458 return fieldSet;
461 /// <seealso cref="IndexReader.GetFieldNames(IndexReader.FieldOption fldOption)">
462 /// </seealso>
463 public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldOption)
465 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
466 for (int i = 0; i < fieldInfos.Size(); i++)
468 FieldInfo fi = fieldInfos.FieldInfo(i);
469 if (fieldOption == IndexReader.FieldOption.ALL)
471 fieldSet.Add(fi.name, fi.name);
473 else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED)
475 fieldSet.Add(fi.name, fi.name);
477 else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED)
479 fieldSet.Add(fi.name, fi.name);
481 else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
483 fieldSet.Add(fi.name, fi.name);
485 else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR)
487 fieldSet.Add(fi.name, fi.name);
489 else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR)
491 fieldSet.Add(fi.name, fi.name);
493 else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION)
495 fieldSet.Add(fi.name, fi.name);
497 else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET)
499 fieldSet.Add(fi.name, fi.name);
501 else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET)
503 fieldSet.Add(fi.name, fi.name);
506 return fieldSet;
510 public override bool HasNorms(System.String field)
512 lock (this)
514 return norms.ContainsKey(field);
518 internal static byte[] CreateFakeNorms(int size)
520 byte[] ones = new byte[size];
521 byte val = DefaultSimilarity.EncodeNorm(1.0f);
522 for (int index = 0; index < size; index++)
523 ones [index] = val;
525 return ones;
528 private byte[] ones;
529 private byte[] FakeNorms()
531 if (ones == null)
532 ones = CreateFakeNorms(MaxDoc());
533 return ones;
536 // can return null if norms aren't stored
537 protected internal virtual byte[] GetNorms(System.String field)
539 lock (this)
541 Norm norm = (Norm) norms[field];
542 if (norm == null)
543 return null; // not indexed, or norms not stored
545 if (norm.bytes == null)
547 // value not yet read
548 byte[] bytes = new byte[MaxDoc()];
549 Norms(field, bytes, 0);
550 norm.bytes = bytes; // cache it
552 return norm.bytes;
556 // returns fake norms if norms aren't available
557 public override byte[] Norms(System.String field)
559 lock (this)
561 byte[] bytes = GetNorms(field);
562 if (bytes == null)
563 bytes = FakeNorms();
564 return bytes;
568 protected internal override void DoSetNorm(int doc, System.String field, byte value_Renamed)
570 Norm norm = (Norm) norms[field];
571 if (norm == null)
572 // not an indexed field
573 return ;
574 norm.dirty = true; // mark it dirty
575 normsDirty = true;
577 Norms(field)[doc] = value_Renamed; // set the value
580 /// <summary>Read norms into a pre-allocated array. </summary>
581 public override void Norms(System.String field, byte[] bytes, int offset)
583 lock (this)
586 Norm norm = (Norm) norms[field];
587 if (norm == null)
589 Array.Copy(FakeNorms(), 0, bytes, offset, MaxDoc());
590 return ;
593 if (norm.bytes != null)
595 // can copy from cache
596 Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
597 return ;
600 IndexInput normStream = (IndexInput) norm.in_Renamed.Clone();
603 // read from disk
604 normStream.Seek(0);
605 normStream.ReadBytes(bytes, offset, MaxDoc());
607 finally
609 normStream.Close();
615 private void OpenNorms(Directory cfsDir)
617 for (int i = 0; i < fieldInfos.Size(); i++)
619 FieldInfo fi = fieldInfos.FieldInfo(i);
620 if (fi.isIndexed && !fi.omitNorms)
622 // look first if there are separate norms in compound format
623 System.String fileName = segment + ".s" + fi.number;
624 Directory d = Directory();
625 if (!d.FileExists(fileName))
627 fileName = segment + ".f" + fi.number;
628 d = cfsDir;
630 norms[fi.name] = new Norm(this, d.OpenInput(fileName), fi.number);
635 private void CloseNorms()
637 lock (norms.SyncRoot)
639 System.Collections.IEnumerator enumerator = norms.Values.GetEnumerator();
640 while (enumerator.MoveNext())
642 Norm norm = (Norm) enumerator.Current;
643 norm.in_Renamed.Close();
648 /// <summary> Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.</summary>
649 /// <returns> TermVectorsReader
650 /// </returns>
651 private TermVectorsReader GetTermVectorsReader()
653 TermVectorsReader tvReader = (TermVectorsReader) System.Threading.Thread.GetData(termVectorsLocal);
654 if (tvReader == null)
656 tvReader = (TermVectorsReader) termVectorsReaderOrig.Clone();
657 System.Threading.Thread.SetData(termVectorsLocal, tvReader);
659 return tvReader;
662 /// <summary>Return a term frequency vector for the specified document and field. The
663 /// vector returned contains term numbers and frequencies for all terms in
664 /// the specified field of this document, if the field had storeTermVector
665 /// flag set. If the flag was not set, the method returns null.
666 /// </summary>
667 /// <throws> IOException </throws>
668 public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
670 // Check if this field is invalid or has no stored term vector
671 FieldInfo fi = fieldInfos.FieldInfo(field);
672 if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
673 return null;
675 TermVectorsReader termVectorsReader = GetTermVectorsReader();
676 if (termVectorsReader == null)
677 return null;
679 return termVectorsReader.Get(docNumber, field);
683 /// <summary>Return an array of term frequency vectors for the specified document.
684 /// The array contains a vector for each vectorized field in the document.
685 /// Each vector vector contains term numbers and frequencies for all terms
686 /// in a given vectorized field.
687 /// If no such fields existed, the method returns null.
688 /// </summary>
689 /// <throws> IOException </throws>
690 public override TermFreqVector[] GetTermFreqVectors(int docNumber)
692 if (termVectorsReaderOrig == null)
693 return null;
695 TermVectorsReader termVectorsReader = GetTermVectorsReader();
696 if (termVectorsReader == null)
697 return null;
699 return termVectorsReader.Get(docNumber);
702 static SegmentReader()
707 System.String name = SupportClass.AppSettings.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader).FullName);
708 IMPL = System.Type.GetType(name);
710 catch (System.Security.SecurityException)
714 IMPL = System.Type.GetType(typeof(SegmentReader).FullName);
716 catch (System.Exception e)
718 throw new System.SystemException("cannot load default SegmentReader class: " + e);
721 catch (System.Exception e)
723 throw new System.SystemException("cannot load SegmentReader class: " + e);