Back out the thread local storage workaround for a Mono bug since it was
[beagle.git] / beagled / Lucene.Net / Index / SegmentReader.cs
blob2da963550b7dd0b76b33096dd3eab648b4fe061b
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 using System;
17 using Document = Lucene.Net.Documents.Document;
18 using Field = Lucene.Net.Documents.Field;
19 using Directory = Lucene.Net.Store.Directory;
20 using IndexInput = Lucene.Net.Store.IndexInput;
21 using IndexOutput = Lucene.Net.Store.IndexOutput;
22 using BitVector = Lucene.Net.Util.BitVector;
23 namespace Lucene.Net.Index
26 /// <summary> FIXME: Describe class <code>SegmentReader</code> here.
27 ///
28 /// </summary>
29 /// <version> $Id: SegmentReader.cs,v 1.6 2006/04/26 19:19:53 joeshaw Exp $
30 /// </version>
31 class SegmentReader : IndexReader
33 private System.String segment;
35 internal FieldInfos fieldInfos;
36 private FieldsReader fieldsReader;
38 internal TermInfosReader tis;
39 internal TermVectorsReader termVectorsReaderOrig = null;
40 internal System.LocalDataStoreSlot termVectorsLocal = System.Threading.Thread.AllocateDataSlot();
42 internal BitVector deletedDocs = null;
43 private bool deletedDocsDirty = false;
44 private bool normsDirty = false;
45 private bool undeleteAll = false;
47 internal IndexInput freqStream;
48 internal IndexInput proxStream;
50 // Compound File Reader when based on a compound file segment
51 internal CompoundFileReader cfsReader = null;
53 private class Norm
55 private void InitBlock(SegmentReader enclosingInstance)
57 this.enclosingInstance = enclosingInstance;
59 private SegmentReader enclosingInstance;
60 public SegmentReader Enclosing_Instance
62 get
64 return enclosingInstance;
68 public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number)
70 InitBlock(enclosingInstance);
71 this.in_Renamed = in_Renamed;
72 this.number = number;
75 public IndexInput in_Renamed; // private -> public
76 public byte[] bytes; // private -> public
77 public bool dirty; // private -> public
78 public int number; // private -> public
80 public void ReWrite() // private -> public
82 // NOTE: norms are re-written in regular directory, not cfs
83 IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(Enclosing_Instance.segment + ".tmp");
84 try
86 out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc());
88 finally
90 out_Renamed.Close();
92 System.String fileName;
93 if (Enclosing_Instance.cfsReader == null)
94 fileName = Enclosing_Instance.segment + ".f" + number;
95 else
97 // use a different file name if we have compound format
98 fileName = Enclosing_Instance.segment + ".s" + number;
100 Enclosing_Instance.Directory().RenameFile(Enclosing_Instance.segment + ".tmp", fileName);
101 this.dirty = false;
105 private System.Collections.Hashtable norms = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
107 /// <summary>The class which implements SegmentReader. </summary>
108 private static System.Type IMPL;
110 public SegmentReader() : base(null)
114 public static SegmentReader Get(SegmentInfo si)
116 return Get(si.dir, si, null, false, false);
119 public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir)
121 return Get(si.dir, si, sis, closeDir, true);
124 public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir)
126 SegmentReader instance;
129 instance = (SegmentReader) System.Activator.CreateInstance(IMPL);
131 catch (System.Exception e)
133 throw new System.SystemException("cannot load SegmentReader class: " + e.ToString());
135 instance.Init(dir, sis, closeDir, ownDir);
136 instance.Initialize(si);
137 return instance;
140 private void Initialize(SegmentInfo si)
142 segment = si.name;
144 // Use compound file directory for some files, if it exists
145 Directory cfsDir = Directory();
146 if (Directory().FileExists(segment + ".cfs"))
148 cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
149 cfsDir = cfsReader;
152 // No compound file exists - use the multi-file format
153 fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
154 fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
156 tis = new TermInfosReader(cfsDir, segment, fieldInfos);
158 // NOTE: the bitvector is stored using the regular directory, not cfs
159 if (HasDeletions(si))
160 deletedDocs = new BitVector(Directory(), segment + ".del");
162 // make sure that all index files have been read or are kept open
163 // so that if an index update removes them we'll still have them
164 freqStream = cfsDir.OpenInput(segment + ".frq");
165 proxStream = cfsDir.OpenInput(segment + ".prx");
166 OpenNorms(cfsDir);
168 if (fieldInfos.HasVectors())
170 // open term vector files only as needed
171 termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
175 ~SegmentReader()
177 // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
180 System.Threading.Thread.SetData(termVectorsLocal, null); // {{Aroush-1.9}} is this required for .NET ?!
182 catch (Exception ex)
184 // System.Console.WriteLine(ex.Message);
188 protected internal override void DoCommit()
190 if (deletedDocsDirty)
192 // re-write deleted
193 deletedDocs.Write(Directory(), segment + ".tmp");
194 Directory().RenameFile(segment + ".tmp", segment + ".del");
196 if (undeleteAll && Directory().FileExists(segment + ".del"))
198 Directory().DeleteFile(segment + ".del");
200 if (normsDirty)
202 // re-write norms
203 System.Collections.IEnumerator values = norms.Values.GetEnumerator();
204 while (values.MoveNext())
206 Norm norm = (Norm) values.Current;
207 if (norm.dirty)
209 norm.ReWrite();
213 deletedDocsDirty = false;
214 normsDirty = false;
215 undeleteAll = false;
218 protected internal override void DoClose()
220 fieldsReader.Close();
221 tis.Close();
223 if (freqStream != null)
224 freqStream.Close();
225 if (proxStream != null)
226 proxStream.Close();
228 CloseNorms();
230 if (termVectorsReaderOrig != null)
231 termVectorsReaderOrig.Close();
233 if (cfsReader != null)
234 cfsReader.Close();
237 internal static bool HasDeletions(SegmentInfo si)
239 return si.dir.FileExists(si.name + ".del");
242 public override bool HasDeletions()
244 return deletedDocs != null;
248 internal static bool UsesCompoundFile(SegmentInfo si)
250 return si.dir.FileExists(si.name + ".cfs");
253 internal static bool HasSeparateNorms(SegmentInfo si)
255 System.String[] result = si.dir.List();
256 System.String pattern = si.name + ".f";
257 int patternLength = pattern.Length;
258 for (int i = 0; i < result.Length; i++)
260 if (result[i].StartsWith(pattern) && System.Char.IsDigit(result[i][patternLength]))
261 return true;
263 return false;
266 protected internal override void DoDelete(int docNum)
268 if (deletedDocs == null)
269 deletedDocs = new BitVector(MaxDoc());
270 deletedDocsDirty = true;
271 undeleteAll = false;
272 deletedDocs.Set(docNum);
275 protected internal override void DoUndeleteAll()
277 deletedDocs = null;
278 deletedDocsDirty = false;
279 undeleteAll = true;
282 internal virtual System.Collections.ArrayList Files()
284 System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16));
285 System.String[] ext = new System.String[]{"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp"};
287 for (int i = 0; i < ext.Length; i++)
289 System.String name = segment + "." + ext[i];
290 if (Directory().FileExists(name))
291 files.Add(name);
294 for (int i = 0; i < fieldInfos.Size(); i++)
296 FieldInfo fi = fieldInfos.FieldInfo(i);
297 if (fi.isIndexed)
299 System.String name;
300 if (cfsReader == null)
301 name = segment + ".f" + i;
302 else
303 name = segment + ".s" + i;
304 if (Directory().FileExists(name))
305 files.Add(name);
308 return files;
311 public override TermEnum Terms()
313 return tis.Terms();
316 public override TermEnum Terms(Term t)
318 return tis.Terms(t);
321 public override Document Document(int n)
323 lock (this)
325 if (IsDeleted(n))
326 throw new System.ArgumentException("attempt to access a deleted document");
327 return fieldsReader.Doc(n);
331 public override bool IsDeleted(int n)
333 lock (this)
335 return (deletedDocs != null && deletedDocs.Get(n));
339 public override TermDocs TermDocs()
341 return new SegmentTermDocs(this);
344 public override TermPositions TermPositions()
346 return new SegmentTermPositions(this);
349 public override int DocFreq(Term t)
351 TermInfo ti = tis.Get(t);
352 if (ti != null)
353 return ti.docFreq;
354 else
355 return 0;
358 public override int NumDocs()
360 int n = MaxDoc();
361 if (deletedDocs != null)
362 n -= deletedDocs.Count();
363 return n;
366 public override int MaxDoc()
368 return fieldsReader.Size();
371 /// <seealso cref="IndexReader#GetFieldNames()">
372 /// </seealso>
373 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
374 /// </deprecated>
375 public override System.Collections.ICollection GetFieldNames()
377 // maintain a unique set of Field names
378 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
379 for (int i = 0; i < fieldInfos.Size(); i++)
381 FieldInfo fi = fieldInfos.FieldInfo(i);
382 fieldSet.Add(fi.name, fi.name);
384 return fieldSet;
387 /// <seealso cref="IndexReader#GetFieldNames(boolean)">
388 /// </seealso>
389 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
390 /// </deprecated>
391 public override System.Collections.ICollection GetFieldNames(bool indexed)
393 // maintain a unique set of Field names
394 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
395 for (int i = 0; i < fieldInfos.Size(); i++)
397 FieldInfo fi = fieldInfos.FieldInfo(i);
398 if (fi.isIndexed == indexed)
399 fieldSet.Add(fi.name, fi.name);
401 return fieldSet;
404 /// <seealso cref="tvSpec)">
405 /// </seealso>
406 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
407 /// </deprecated>
408 public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec)
410 bool storedTermVector;
411 bool storePositionWithTermVector;
412 bool storeOffsetWithTermVector;
414 if (tvSpec == Field.TermVector.NO)
416 storedTermVector = false;
417 storePositionWithTermVector = false;
418 storeOffsetWithTermVector = false;
420 else if (tvSpec == Field.TermVector.YES)
422 storedTermVector = true;
423 storePositionWithTermVector = false;
424 storeOffsetWithTermVector = false;
426 else if (tvSpec == Field.TermVector.WITH_POSITIONS)
428 storedTermVector = true;
429 storePositionWithTermVector = true;
430 storeOffsetWithTermVector = false;
432 else if (tvSpec == Field.TermVector.WITH_OFFSETS)
434 storedTermVector = true;
435 storePositionWithTermVector = false;
436 storeOffsetWithTermVector = true;
438 else if (tvSpec == Field.TermVector.WITH_POSITIONS_OFFSETS)
440 storedTermVector = true;
441 storePositionWithTermVector = true;
442 storeOffsetWithTermVector = true;
444 else
446 throw new System.ArgumentException("unknown termVector parameter " + tvSpec);
449 // maintain a unique set of Field names
450 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
451 for (int i = 0; i < fieldInfos.Size(); i++)
453 FieldInfo fi = fieldInfos.FieldInfo(i);
454 if (fi.isIndexed && fi.storeTermVector == storedTermVector && fi.storePositionWithTermVector == storePositionWithTermVector && fi.storeOffsetWithTermVector == storeOffsetWithTermVector)
456 fieldSet.Add(fi.name, fi.name);
459 return fieldSet;
462 /// <seealso cref="fldOption)">
463 /// </seealso>
464 public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldOption)
466 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
467 for (int i = 0; i < fieldInfos.Size(); i++)
469 FieldInfo fi = fieldInfos.FieldInfo(i);
470 if (fieldOption == IndexReader.FieldOption.ALL)
472 fieldSet.Add(fi.name, fi.name);
474 else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED)
476 fieldSet.Add(fi.name, fi.name);
478 else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED)
480 fieldSet.Add(fi.name, fi.name);
482 else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
484 fieldSet.Add(fi.name, fi.name);
486 else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR)
488 fieldSet.Add(fi.name, fi.name);
490 else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR)
492 fieldSet.Add(fi.name, fi.name);
494 else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION)
496 fieldSet.Add(fi.name, fi.name);
498 else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET)
500 fieldSet.Add(fi.name, fi.name);
502 else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET)
504 fieldSet.Add(fi.name, fi.name);
507 return fieldSet;
510 /// <seealso cref="fldOption)">
511 /// </seealso>
512 public override byte[] Norms(System.String field)
514 lock (this)
516 Norm norm = (Norm) norms[field];
517 if (norm == null)
518 // not an indexed Field
519 return null;
520 if (norm.bytes == null)
522 // value not yet read
523 byte[] bytes = new byte[MaxDoc()];
524 Norms(field, bytes, 0);
525 norm.bytes = bytes; // cache it
527 return norm.bytes;
531 protected internal override void DoSetNorm(int doc, System.String field, byte value_Renamed)
533 Norm norm = (Norm) norms[field];
534 if (norm == null)
535 // not an indexed Field
536 return ;
537 norm.dirty = true; // mark it dirty
538 normsDirty = true;
540 Norms(field)[doc] = value_Renamed; // set the value
543 /// <summary>Read norms into a pre-allocated array. </summary>
544 public override void Norms(System.String field, byte[] bytes, int offset)
546 lock (this)
549 Norm norm = (Norm) norms[field];
550 if (norm == null)
551 return ; // use zeros in array
553 if (norm.bytes != null)
555 // can copy from cache
556 Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
557 return ;
560 IndexInput normStream = (IndexInput) norm.in_Renamed.Clone();
563 // read from disk
564 normStream.Seek(0);
565 normStream.ReadBytes(bytes, offset, MaxDoc());
567 finally
569 normStream.Close();
574 private void OpenNorms(Directory cfsDir)
576 for (int i = 0; i < fieldInfos.Size(); i++)
578 FieldInfo fi = fieldInfos.FieldInfo(i);
579 if (fi.isIndexed)
581 // look first if there are separate norms in compound format
582 System.String fileName = segment + ".s" + fi.number;
583 Directory d = Directory();
584 if (!d.FileExists(fileName))
586 fileName = segment + ".f" + fi.number;
587 d = cfsDir;
589 norms[fi.name] = new Norm(this, d.OpenInput(fileName), fi.number);
594 private void CloseNorms()
596 lock (norms.SyncRoot)
598 System.Collections.IEnumerator enumerator = norms.Values.GetEnumerator();
599 while (enumerator.MoveNext())
601 Norm norm = (Norm) enumerator.Current;
602 norm.in_Renamed.Close();
607 /// <summary> Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.</summary>
608 /// <returns> TermVectorsReader
609 /// </returns>
610 private TermVectorsReader GetTermVectorsReader()
612 TermVectorsReader tvReader = (TermVectorsReader) System.Threading.Thread.GetData(termVectorsLocal);
613 if (tvReader == null)
615 tvReader = (TermVectorsReader) termVectorsReaderOrig.Clone();
616 System.Threading.Thread.SetData(termVectorsLocal, tvReader);
618 return tvReader;
621 /// <summary>Return a term frequency vector for the specified document and Field. The
622 /// vector returned contains term numbers and frequencies for all terms in
623 /// the specified Field of this document, if the Field had storeTermVector
624 /// flag set. If the flag was not set, the method returns null.
625 /// </summary>
626 /// <throws> IOException </throws>
627 public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
629 // Check if this Field is invalid or has no stored term vector
630 FieldInfo fi = fieldInfos.FieldInfo(field);
631 if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
632 return null;
634 TermVectorsReader termVectorsReader = GetTermVectorsReader();
635 if (termVectorsReader == null)
636 return null;
638 return termVectorsReader.Get(docNumber, field);
642 /// <summary>Return an array of term frequency vectors for the specified document.
643 /// The array contains a vector for each vectorized Field in the document.
644 /// Each vector vector contains term numbers and frequencies for all terms
645 /// in a given vectorized Field.
646 /// If no such fields existed, the method returns null.
647 /// </summary>
648 /// <throws> IOException </throws>
649 public override TermFreqVector[] GetTermFreqVectors(int docNumber)
651 if (termVectorsReaderOrig == null)
652 return null;
654 TermVectorsReader termVectorsReader = GetTermVectorsReader();
655 if (termVectorsReader == null)
656 return null;
658 return termVectorsReader.Get(docNumber);
661 static SegmentReader()
666 System.String name = SupportClass.AppSettings.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader).FullName);
667 IMPL = System.Type.GetType(name);
669 catch (System.Exception e)
671 throw new System.SystemException("cannot load SegmentReader class: " + e.ToString());