Update the thread-local storage patch, to fix #335178
[beagle.git] / beagled / Lucene.Net / Index / SegmentReader.cs
blob11f5ed0d17436203222e32fabfe1bd3ebd5ea8d4
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 using System;
17 using Document = Lucene.Net.Documents.Document;
18 using Field = Lucene.Net.Documents.Field;
19 using Directory = Lucene.Net.Store.Directory;
20 using IndexInput = Lucene.Net.Store.IndexInput;
21 using IndexOutput = Lucene.Net.Store.IndexOutput;
22 using BitVector = Lucene.Net.Util.BitVector;
23 namespace Lucene.Net.Index
26 /// <summary> FIXME: Describe class <code>SegmentReader</code> here.
27 ///
28 /// </summary>
29 /// <version> $Id: SegmentReader.cs,v 1.5 2006/04/03 17:12:17 joeshaw Exp $
30 /// </version>
31 class SegmentReader : IndexReader
33 private System.String segment;
35 internal FieldInfos fieldInfos;
36 private FieldsReader fieldsReader;
38 internal TermInfosReader tis;
39 internal TermVectorsReader termVectorsReaderOrig = null;
41 [ThreadStatic]
42 private TermVectorsReader tvReader;
44 internal BitVector deletedDocs = null;
45 private bool deletedDocsDirty = false;
46 private bool normsDirty = false;
47 private bool undeleteAll = false;
49 internal IndexInput freqStream;
50 internal IndexInput proxStream;
52 // Compound File Reader when based on a compound file segment
53 internal CompoundFileReader cfsReader = null;
55 private class Norm
57 private void InitBlock(SegmentReader enclosingInstance)
59 this.enclosingInstance = enclosingInstance;
61 private SegmentReader enclosingInstance;
62 public SegmentReader Enclosing_Instance
64 get
66 return enclosingInstance;
70 public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number)
72 InitBlock(enclosingInstance);
73 this.in_Renamed = in_Renamed;
74 this.number = number;
77 public IndexInput in_Renamed; // private -> public
78 public byte[] bytes; // private -> public
79 public bool dirty; // private -> public
80 public int number; // private -> public
82 public void ReWrite() // private -> public
84 // NOTE: norms are re-written in regular directory, not cfs
85 IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(Enclosing_Instance.segment + ".tmp");
86 try
88 out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc());
90 finally
92 out_Renamed.Close();
94 System.String fileName;
95 if (Enclosing_Instance.cfsReader == null)
96 fileName = Enclosing_Instance.segment + ".f" + number;
97 else
99 // use a different file name if we have compound format
100 fileName = Enclosing_Instance.segment + ".s" + number;
102 Enclosing_Instance.Directory().RenameFile(Enclosing_Instance.segment + ".tmp", fileName);
103 this.dirty = false;
107 private System.Collections.Hashtable norms = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
109 /// <summary>The class which implements SegmentReader. </summary>
110 private static System.Type IMPL;
112 public SegmentReader() : base(null)
116 public static SegmentReader Get(SegmentInfo si)
118 return Get(si.dir, si, null, false, false);
121 public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir)
123 return Get(si.dir, si, sis, closeDir, true);
126 public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir)
128 SegmentReader instance;
131 instance = (SegmentReader) System.Activator.CreateInstance(IMPL);
133 catch (System.Exception e)
135 throw new System.SystemException("cannot load SegmentReader class: " + e.ToString());
137 instance.Init(dir, sis, closeDir, ownDir);
138 instance.Initialize(si);
139 return instance;
142 private void Initialize(SegmentInfo si)
144 segment = si.name;
146 // Use compound file directory for some files, if it exists
147 Directory cfsDir = Directory();
148 if (Directory().FileExists(segment + ".cfs"))
150 cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
151 cfsDir = cfsReader;
154 // No compound file exists - use the multi-file format
155 fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
156 fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
158 tis = new TermInfosReader(cfsDir, segment, fieldInfos);
160 // NOTE: the bitvector is stored using the regular directory, not cfs
161 if (HasDeletions(si))
162 deletedDocs = new BitVector(Directory(), segment + ".del");
164 // make sure that all index files have been read or are kept open
165 // so that if an index update removes them we'll still have them
166 freqStream = cfsDir.OpenInput(segment + ".frq");
167 proxStream = cfsDir.OpenInput(segment + ".prx");
168 OpenNorms(cfsDir);
170 if (fieldInfos.HasVectors())
172 // open term vector files only as needed
173 termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
177 protected internal override void DoCommit()
179 if (deletedDocsDirty)
181 // re-write deleted
182 deletedDocs.Write(Directory(), segment + ".tmp");
183 Directory().RenameFile(segment + ".tmp", segment + ".del");
185 if (undeleteAll && Directory().FileExists(segment + ".del"))
187 Directory().DeleteFile(segment + ".del");
189 if (normsDirty)
191 // re-write norms
192 System.Collections.IEnumerator values = norms.Values.GetEnumerator();
193 while (values.MoveNext())
195 Norm norm = (Norm) values.Current;
196 if (norm.dirty)
198 norm.ReWrite();
202 deletedDocsDirty = false;
203 normsDirty = false;
204 undeleteAll = false;
207 protected internal override void DoClose()
209 fieldsReader.Close();
210 tis.Close();
212 if (freqStream != null)
213 freqStream.Close();
214 if (proxStream != null)
215 proxStream.Close();
217 CloseNorms();
219 if (termVectorsReaderOrig != null)
220 termVectorsReaderOrig.Close();
222 if (cfsReader != null)
223 cfsReader.Close();
226 internal static bool HasDeletions(SegmentInfo si)
228 return si.dir.FileExists(si.name + ".del");
231 public override bool HasDeletions()
233 return deletedDocs != null;
237 internal static bool UsesCompoundFile(SegmentInfo si)
239 return si.dir.FileExists(si.name + ".cfs");
242 internal static bool HasSeparateNorms(SegmentInfo si)
244 System.String[] result = si.dir.List();
245 System.String pattern = si.name + ".f";
246 int patternLength = pattern.Length;
247 for (int i = 0; i < result.Length; i++)
249 if (result[i].StartsWith(pattern) && System.Char.IsDigit(result[i][patternLength]))
250 return true;
252 return false;
255 protected internal override void DoDelete(int docNum)
257 if (deletedDocs == null)
258 deletedDocs = new BitVector(MaxDoc());
259 deletedDocsDirty = true;
260 undeleteAll = false;
261 deletedDocs.Set(docNum);
264 protected internal override void DoUndeleteAll()
266 deletedDocs = null;
267 deletedDocsDirty = false;
268 undeleteAll = true;
271 internal virtual System.Collections.ArrayList Files()
273 System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16));
274 System.String[] ext = new System.String[]{"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp"};
276 for (int i = 0; i < ext.Length; i++)
278 System.String name = segment + "." + ext[i];
279 if (Directory().FileExists(name))
280 files.Add(name);
283 for (int i = 0; i < fieldInfos.Size(); i++)
285 FieldInfo fi = fieldInfos.FieldInfo(i);
286 if (fi.isIndexed)
288 System.String name;
289 if (cfsReader == null)
290 name = segment + ".f" + i;
291 else
292 name = segment + ".s" + i;
293 if (Directory().FileExists(name))
294 files.Add(name);
297 return files;
300 public override TermEnum Terms()
302 return tis.Terms();
305 public override TermEnum Terms(Term t)
307 return tis.Terms(t);
310 public override Document Document(int n)
312 lock (this)
314 if (IsDeleted(n))
315 throw new System.ArgumentException("attempt to access a deleted document");
316 return fieldsReader.Doc(n);
320 public override bool IsDeleted(int n)
322 lock (this)
324 return (deletedDocs != null && deletedDocs.Get(n));
328 public override TermDocs TermDocs()
330 return new SegmentTermDocs(this);
333 public override TermPositions TermPositions()
335 return new SegmentTermPositions(this);
338 public override int DocFreq(Term t)
340 TermInfo ti = tis.Get(t);
341 if (ti != null)
342 return ti.docFreq;
343 else
344 return 0;
347 public override int NumDocs()
349 int n = MaxDoc();
350 if (deletedDocs != null)
351 n -= deletedDocs.Count();
352 return n;
355 public override int MaxDoc()
357 return fieldsReader.Size();
360 /// <seealso cref="IndexReader#GetFieldNames()">
361 /// </seealso>
362 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
363 /// </deprecated>
364 public override System.Collections.ICollection GetFieldNames()
366 // maintain a unique set of Field names
367 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
368 for (int i = 0; i < fieldInfos.Size(); i++)
370 FieldInfo fi = fieldInfos.FieldInfo(i);
371 fieldSet.Add(fi.name, fi.name);
373 return fieldSet;
376 /// <seealso cref="IndexReader#GetFieldNames(boolean)">
377 /// </seealso>
378 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
379 /// </deprecated>
380 public override System.Collections.ICollection GetFieldNames(bool indexed)
382 // maintain a unique set of Field names
383 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
384 for (int i = 0; i < fieldInfos.Size(); i++)
386 FieldInfo fi = fieldInfos.FieldInfo(i);
387 if (fi.isIndexed == indexed)
388 fieldSet.Add(fi.name, fi.name);
390 return fieldSet;
393 /// <seealso cref="tvSpec)">
394 /// </seealso>
395 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
396 /// </deprecated>
397 public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec)
399 bool storedTermVector;
400 bool storePositionWithTermVector;
401 bool storeOffsetWithTermVector;
403 if (tvSpec == Field.TermVector.NO)
405 storedTermVector = false;
406 storePositionWithTermVector = false;
407 storeOffsetWithTermVector = false;
409 else if (tvSpec == Field.TermVector.YES)
411 storedTermVector = true;
412 storePositionWithTermVector = false;
413 storeOffsetWithTermVector = false;
415 else if (tvSpec == Field.TermVector.WITH_POSITIONS)
417 storedTermVector = true;
418 storePositionWithTermVector = true;
419 storeOffsetWithTermVector = false;
421 else if (tvSpec == Field.TermVector.WITH_OFFSETS)
423 storedTermVector = true;
424 storePositionWithTermVector = false;
425 storeOffsetWithTermVector = true;
427 else if (tvSpec == Field.TermVector.WITH_POSITIONS_OFFSETS)
429 storedTermVector = true;
430 storePositionWithTermVector = true;
431 storeOffsetWithTermVector = true;
433 else
435 throw new System.ArgumentException("unknown termVector parameter " + tvSpec);
438 // maintain a unique set of Field names
439 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
440 for (int i = 0; i < fieldInfos.Size(); i++)
442 FieldInfo fi = fieldInfos.FieldInfo(i);
443 if (fi.isIndexed && fi.storeTermVector == storedTermVector && fi.storePositionWithTermVector == storePositionWithTermVector && fi.storeOffsetWithTermVector == storeOffsetWithTermVector)
445 fieldSet.Add(fi.name, fi.name);
448 return fieldSet;
451 /// <seealso cref="fldOption)">
452 /// </seealso>
453 public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldOption)
455 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
456 for (int i = 0; i < fieldInfos.Size(); i++)
458 FieldInfo fi = fieldInfos.FieldInfo(i);
459 if (fieldOption == IndexReader.FieldOption.ALL)
461 fieldSet.Add(fi.name, fi.name);
463 else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED)
465 fieldSet.Add(fi.name, fi.name);
467 else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED)
469 fieldSet.Add(fi.name, fi.name);
471 else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
473 fieldSet.Add(fi.name, fi.name);
475 else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR)
477 fieldSet.Add(fi.name, fi.name);
479 else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR)
481 fieldSet.Add(fi.name, fi.name);
483 else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION)
485 fieldSet.Add(fi.name, fi.name);
487 else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET)
489 fieldSet.Add(fi.name, fi.name);
491 else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET)
493 fieldSet.Add(fi.name, fi.name);
496 return fieldSet;
499 /// <seealso cref="fldOption)">
500 /// </seealso>
501 public override byte[] Norms(System.String field)
503 lock (this)
505 Norm norm = (Norm) norms[field];
506 if (norm == null)
507 // not an indexed Field
508 return null;
509 if (norm.bytes == null)
511 // value not yet read
512 byte[] bytes = new byte[MaxDoc()];
513 Norms(field, bytes, 0);
514 norm.bytes = bytes; // cache it
516 return norm.bytes;
520 protected internal override void DoSetNorm(int doc, System.String field, byte value_Renamed)
522 Norm norm = (Norm) norms[field];
523 if (norm == null)
524 // not an indexed Field
525 return ;
526 norm.dirty = true; // mark it dirty
527 normsDirty = true;
529 Norms(field)[doc] = value_Renamed; // set the value
532 /// <summary>Read norms into a pre-allocated array. </summary>
533 public override void Norms(System.String field, byte[] bytes, int offset)
535 lock (this)
538 Norm norm = (Norm) norms[field];
539 if (norm == null)
540 return ; // use zeros in array
542 if (norm.bytes != null)
544 // can copy from cache
545 Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
546 return ;
549 IndexInput normStream = (IndexInput) norm.in_Renamed.Clone();
552 // read from disk
553 normStream.Seek(0);
554 normStream.ReadBytes(bytes, offset, MaxDoc());
556 finally
558 normStream.Close();
563 private void OpenNorms(Directory cfsDir)
565 for (int i = 0; i < fieldInfos.Size(); i++)
567 FieldInfo fi = fieldInfos.FieldInfo(i);
568 if (fi.isIndexed)
570 // look first if there are separate norms in compound format
571 System.String fileName = segment + ".s" + fi.number;
572 Directory d = Directory();
573 if (!d.FileExists(fileName))
575 fileName = segment + ".f" + fi.number;
576 d = cfsDir;
578 norms[fi.name] = new Norm(this, d.OpenInput(fileName), fi.number);
583 private void CloseNorms()
585 lock (norms.SyncRoot)
587 System.Collections.IEnumerator enumerator = norms.Values.GetEnumerator();
588 while (enumerator.MoveNext())
590 Norm norm = (Norm) enumerator.Current;
591 norm.in_Renamed.Close();
596 /// <summary> Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.</summary>
597 /// <returns> TermVectorsReader
598 /// </returns>
599 private TermVectorsReader GetTermVectorsReader()
601 if (tvReader == null)
603 tvReader = (TermVectorsReader) termVectorsReaderOrig.Clone();
605 return tvReader;
608 /// <summary>Return a term frequency vector for the specified document and Field. The
609 /// vector returned contains term numbers and frequencies for all terms in
610 /// the specified Field of this document, if the Field had storeTermVector
611 /// flag set. If the flag was not set, the method returns null.
612 /// </summary>
613 /// <throws> IOException </throws>
614 public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
616 // Check if this Field is invalid or has no stored term vector
617 FieldInfo fi = fieldInfos.FieldInfo(field);
618 if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
619 return null;
621 TermVectorsReader termVectorsReader = GetTermVectorsReader();
622 if (termVectorsReader == null)
623 return null;
625 return termVectorsReader.Get(docNumber, field);
629 /// <summary>Return an array of term frequency vectors for the specified document.
630 /// The array contains a vector for each vectorized Field in the document.
631 /// Each vector vector contains term numbers and frequencies for all terms
632 /// in a given vectorized Field.
633 /// If no such fields existed, the method returns null.
634 /// </summary>
635 /// <throws> IOException </throws>
636 public override TermFreqVector[] GetTermFreqVectors(int docNumber)
638 if (termVectorsReaderOrig == null)
639 return null;
641 TermVectorsReader termVectorsReader = GetTermVectorsReader();
642 if (termVectorsReader == null)
643 return null;
645 return termVectorsReader.Get(docNumber);
648 static SegmentReader()
653 System.String name = SupportClass.AppSettings.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader).FullName);
654 IMPL = System.Type.GetType(name);
656 catch (System.Exception e)
658 throw new System.SystemException("cannot load SegmentReader class: " + e.ToString());