2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 using Document
= Lucene
.Net
.Documents
.Document
;
19 using Field
= Lucene
.Net
.Documents
.Field
;
20 using DefaultSimilarity
= Lucene
.Net
.Search
.DefaultSimilarity
;
21 using Directory
= Lucene
.Net
.Store
.Directory
;
22 using IndexInput
= Lucene
.Net
.Store
.IndexInput
;
23 using IndexOutput
= Lucene
.Net
.Store
.IndexOutput
;
24 using BitVector
= Lucene
.Net
.Util
.BitVector
;
26 namespace Lucene
.Net
.Index
29 /// <version> $Id: SegmentReader.cs,v 1.8 2006/11/29 19:25:43 joeshaw Exp $
31 public class SegmentReader
: IndexReader
33 private System
.String segment
;
35 internal FieldInfos fieldInfos
;
36 private FieldsReader fieldsReader
;
38 internal TermInfosReader tis
;
39 internal TermVectorsReader termVectorsReaderOrig
= null;
40 internal System
.LocalDataStoreSlot termVectorsLocal
= System
.Threading
.Thread
.AllocateDataSlot();
42 internal BitVector deletedDocs
= null;
43 private bool deletedDocsDirty
= false;
44 private bool normsDirty
= false;
45 private bool undeleteAll
= false;
47 internal IndexInput freqStream
;
48 internal IndexInput proxStream
;
50 // Compound File Reader when based on a compound file segment
51 internal CompoundFileReader cfsReader
= null;
53 public FieldInfos FieldInfos
55 get { return fieldInfos; }
60 private void InitBlock(SegmentReader enclosingInstance
)
62 this.enclosingInstance
= enclosingInstance
;
64 private SegmentReader enclosingInstance
;
65 public SegmentReader Enclosing_Instance
69 return enclosingInstance
;
73 public Norm(SegmentReader enclosingInstance
, IndexInput in_Renamed
, int number
)
75 InitBlock(enclosingInstance
);
76 this.in_Renamed
= in_Renamed
;
80 public IndexInput in_Renamed
;
87 // NOTE: norms are re-written in regular directory, not cfs
88 IndexOutput out_Renamed
= Enclosing_Instance
.Directory().CreateOutput(Enclosing_Instance
.segment
+ ".tmp");
91 out_Renamed
.WriteBytes(bytes
, Enclosing_Instance
.MaxDoc());
97 System
.String fileName
;
98 if (Enclosing_Instance
.cfsReader
== null)
99 fileName
= Enclosing_Instance
.segment
+ ".f" + number
;
102 // use a different file name if we have compound format
103 fileName
= Enclosing_Instance
.segment
+ ".s" + number
;
105 Enclosing_Instance
.Directory().RenameFile(Enclosing_Instance
.segment
+ ".tmp", fileName
);
110 private System
.Collections
.Hashtable norms
= System
.Collections
.Hashtable
.Synchronized(new System
.Collections
.Hashtable());
112 /// <summary>The class which implements SegmentReader. </summary>
113 private static System
.Type IMPL
;
115 public SegmentReader() : base(null)
119 public static SegmentReader
Get(SegmentInfo si
)
121 return Get(si
.dir
, si
, null, false, false);
124 public static SegmentReader
Get(SegmentInfos sis
, SegmentInfo si
, bool closeDir
)
126 return Get(si
.dir
, si
, sis
, closeDir
, true);
129 public static SegmentReader
Get(Directory dir
, SegmentInfo si
, SegmentInfos sis
, bool closeDir
, bool ownDir
)
131 SegmentReader instance
;
134 instance
= (SegmentReader
) System
.Activator
.CreateInstance(IMPL
);
136 catch (System
.Exception e
)
138 throw new System
.SystemException("cannot load SegmentReader class: " + e
);
140 instance
.Init(dir
, sis
, closeDir
, ownDir
);
141 instance
.Initialize(si
);
145 private void Initialize(SegmentInfo si
)
149 // Use compound file directory for some files, if it exists
150 Directory cfsDir
= Directory();
151 if (Directory().FileExists(segment
+ ".cfs"))
153 cfsReader
= new CompoundFileReader(Directory(), segment
+ ".cfs");
157 // No compound file exists - use the multi-file format
158 fieldInfos
= new FieldInfos(cfsDir
, segment
+ ".fnm");
159 fieldsReader
= new FieldsReader(cfsDir
, segment
, fieldInfos
);
161 tis
= new TermInfosReader(cfsDir
, segment
, fieldInfos
);
163 // NOTE: the bitvector is stored using the regular directory, not cfs
164 if (HasDeletions(si
))
165 deletedDocs
= new BitVector(Directory(), segment
+ ".del");
167 // make sure that all index files have been read or are kept open
168 // so that if an index update removes them we'll still have them
169 freqStream
= cfsDir
.OpenInput(segment
+ ".frq");
170 proxStream
= cfsDir
.OpenInput(segment
+ ".prx");
173 if (fieldInfos
.HasVectors())
175 // open term vector files only as needed
176 termVectorsReaderOrig
= new TermVectorsReader(cfsDir
, segment
, fieldInfos
);
180 /* Leaving this here will cause a memory leak under .NET 1.1
183 // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
184 //System.Threading.Thread.SetData(termVectorsLocal, null);
188 protected internal override void DoCommit()
190 if (deletedDocsDirty
)
193 deletedDocs
.Write(Directory(), segment
+ ".tmp");
194 Directory().RenameFile(segment
+ ".tmp", segment
+ ".del");
196 if (undeleteAll
&& Directory().FileExists(segment
+ ".del"))
198 Directory().DeleteFile(segment
+ ".del");
203 System
.Collections
.IEnumerator values
= norms
.Values
.GetEnumerator();
204 while (values
.MoveNext())
206 Norm norm
= (Norm
) values
.Current
;
213 deletedDocsDirty
= false;
218 protected internal override void DoClose()
220 fieldsReader
.Close();
223 if (freqStream
!= null)
225 if (proxStream
!= null)
230 if (termVectorsReaderOrig
!= null)
231 termVectorsReaderOrig
.Close();
233 if (cfsReader
!= null)
237 internal static bool HasDeletions(SegmentInfo si
)
239 return si
.dir
.FileExists(si
.name
+ ".del");
242 public override bool HasDeletions()
244 return deletedDocs
!= null;
248 internal static bool UsesCompoundFile(SegmentInfo si
)
250 return si
.dir
.FileExists(si
.name
+ ".cfs");
253 internal static bool HasSeparateNorms(SegmentInfo si
)
255 System
.String
[] result
= si
.dir
.List();
256 System
.String pattern
= si
.name
+ ".s";
257 int patternLength
= pattern
.Length
;
258 for (int i
= 0; i
< result
.Length
; i
++)
260 if (result
[i
].StartsWith(pattern
) && System
.Char
.IsDigit(result
[i
][patternLength
]))
266 protected internal override void DoDelete(int docNum
)
268 if (deletedDocs
== null)
269 deletedDocs
= new BitVector(MaxDoc());
270 deletedDocsDirty
= true;
272 deletedDocs
.Set(docNum
);
275 protected internal override void DoUndeleteAll()
278 deletedDocsDirty
= false;
282 internal virtual System
.Collections
.ArrayList
Files()
284 System
.Collections
.ArrayList files
= System
.Collections
.ArrayList
.Synchronized(new System
.Collections
.ArrayList(16));
286 for (int i
= 0; i
< IndexFileNames
.INDEX_EXTENSIONS
.Length
; i
++)
288 System
.String name
= segment
+ "." + IndexFileNames
.INDEX_EXTENSIONS
[i
];
289 if (Directory().FileExists(name
))
293 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
295 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
296 if (fi
.isIndexed
&& !fi
.omitNorms
)
299 if (cfsReader
== null)
300 name
= segment
+ ".f" + i
;
302 name
= segment
+ ".s" + i
;
303 if (Directory().FileExists(name
))
310 public override TermEnum
Terms()
315 public override TermEnum
Terms(Term t
)
320 public override Document
Document(int n
)
325 throw new System
.ArgumentException("attempt to access a deleted document");
326 return fieldsReader
.Doc(n
);
330 public override bool IsDeleted(int n
)
334 return (deletedDocs
!= null && deletedDocs
.Get(n
));
338 public override TermDocs
TermDocs()
340 return new SegmentTermDocs(this);
343 public override TermPositions
TermPositions()
345 return new SegmentTermPositions(this);
348 public override int DocFreq(Term t
)
350 TermInfo ti
= tis
.Get(t
);
357 public override int NumDocs()
360 if (deletedDocs
!= null)
361 n
-= deletedDocs
.Count();
365 public override int MaxDoc()
367 return fieldsReader
.Size();
370 /// <seealso cref="IndexReader.GetFieldNames()">
372 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
374 public override System
.Collections
.ICollection
GetFieldNames()
376 // maintain a unique set of field names
377 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
378 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
380 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
381 fieldSet
.Add(fi
.name
, fi
.name
);
386 /// <seealso cref="IndexReader.GetFieldNames(boolean)">
388 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
390 public override System
.Collections
.ICollection
GetFieldNames(bool indexed
)
392 // maintain a unique set of field names
393 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
394 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
396 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
397 if (fi
.isIndexed
== indexed
)
398 fieldSet
.Add(fi
.name
, fi
.name
);
403 /// <seealso cref="IndexReader.GetIndexedFieldNames(Field.TermVector tvSpec)">
405 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
407 public override System
.Collections
.ICollection
GetIndexedFieldNames(Field
.TermVector tvSpec
)
409 bool storedTermVector
;
410 bool storePositionWithTermVector
;
411 bool storeOffsetWithTermVector
;
413 if (tvSpec
== Field
.TermVector
.NO
)
415 storedTermVector
= false;
416 storePositionWithTermVector
= false;
417 storeOffsetWithTermVector
= false;
419 else if (tvSpec
== Field
.TermVector
.YES
)
421 storedTermVector
= true;
422 storePositionWithTermVector
= false;
423 storeOffsetWithTermVector
= false;
425 else if (tvSpec
== Field
.TermVector
.WITH_POSITIONS
)
427 storedTermVector
= true;
428 storePositionWithTermVector
= true;
429 storeOffsetWithTermVector
= false;
431 else if (tvSpec
== Field
.TermVector
.WITH_OFFSETS
)
433 storedTermVector
= true;
434 storePositionWithTermVector
= false;
435 storeOffsetWithTermVector
= true;
437 else if (tvSpec
== Field
.TermVector
.WITH_POSITIONS_OFFSETS
)
439 storedTermVector
= true;
440 storePositionWithTermVector
= true;
441 storeOffsetWithTermVector
= true;
445 throw new System
.ArgumentException("unknown termVector parameter " + tvSpec
);
448 // maintain a unique set of field names
449 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
450 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
452 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
453 if (fi
.isIndexed
&& fi
.storeTermVector
== storedTermVector
&& fi
.storePositionWithTermVector
== storePositionWithTermVector
&& fi
.storeOffsetWithTermVector
== storeOffsetWithTermVector
)
455 fieldSet
.Add(fi
.name
, fi
.name
);
461 /// <seealso cref="IndexReader.GetFieldNames(IndexReader.FieldOption fldOption)">
463 public override System
.Collections
.ICollection
GetFieldNames(IndexReader
.FieldOption fieldOption
)
465 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
466 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
468 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
469 if (fieldOption
== IndexReader
.FieldOption
.ALL
)
471 fieldSet
.Add(fi
.name
, fi
.name
);
473 else if (!fi
.isIndexed
&& fieldOption
== IndexReader
.FieldOption
.UNINDEXED
)
475 fieldSet
.Add(fi
.name
, fi
.name
);
477 else if (fi
.isIndexed
&& fieldOption
== IndexReader
.FieldOption
.INDEXED
)
479 fieldSet
.Add(fi
.name
, fi
.name
);
481 else if (fi
.isIndexed
&& fi
.storeTermVector
== false && fieldOption
== IndexReader
.FieldOption
.INDEXED_NO_TERMVECTOR
)
483 fieldSet
.Add(fi
.name
, fi
.name
);
485 else if (fi
.storeTermVector
== true && fi
.storePositionWithTermVector
== false && fi
.storeOffsetWithTermVector
== false && fieldOption
== IndexReader
.FieldOption
.TERMVECTOR
)
487 fieldSet
.Add(fi
.name
, fi
.name
);
489 else if (fi
.isIndexed
&& fi
.storeTermVector
&& fieldOption
== IndexReader
.FieldOption
.INDEXED_WITH_TERMVECTOR
)
491 fieldSet
.Add(fi
.name
, fi
.name
);
493 else if (fi
.storePositionWithTermVector
&& fi
.storeOffsetWithTermVector
== false && fieldOption
== IndexReader
.FieldOption
.TERMVECTOR_WITH_POSITION
)
495 fieldSet
.Add(fi
.name
, fi
.name
);
497 else if (fi
.storeOffsetWithTermVector
&& fi
.storePositionWithTermVector
== false && fieldOption
== IndexReader
.FieldOption
.TERMVECTOR_WITH_OFFSET
)
499 fieldSet
.Add(fi
.name
, fi
.name
);
501 else if ((fi
.storeOffsetWithTermVector
&& fi
.storePositionWithTermVector
) && fieldOption
== IndexReader
.FieldOption
.TERMVECTOR_WITH_POSITION_OFFSET
)
503 fieldSet
.Add(fi
.name
, fi
.name
);
510 public override bool HasNorms(System
.String field
)
514 return norms
.ContainsKey(field
);
518 internal static byte[] CreateFakeNorms(int size
)
520 byte[] ones
= new byte[size
];
521 byte val
= DefaultSimilarity
.EncodeNorm(1.0f
);
522 for (int index
= 0; index
< size
; index
++)
529 private byte[] FakeNorms()
532 ones
= CreateFakeNorms(MaxDoc());
536 // can return null if norms aren't stored
537 protected internal virtual byte[] GetNorms(System
.String field
)
541 Norm norm
= (Norm
) norms
[field
];
543 return null; // not indexed, or norms not stored
545 if (norm
.bytes
== null)
547 // value not yet read
548 byte[] bytes
= new byte[MaxDoc()];
549 Norms(field
, bytes
, 0);
550 norm
.bytes
= bytes
; // cache it
556 // returns fake norms if norms aren't available
557 public override byte[] Norms(System
.String field
)
561 byte[] bytes
= GetNorms(field
);
568 protected internal override void DoSetNorm(int doc
, System
.String field
, byte value_Renamed
)
570 Norm norm
= (Norm
) norms
[field
];
572 // not an indexed field
574 norm
.dirty
= true; // mark it dirty
577 Norms(field
)[doc
] = value_Renamed
; // set the value
580 /// <summary>Read norms into a pre-allocated array. </summary>
581 public override void Norms(System
.String field
, byte[] bytes
, int offset
)
586 Norm norm
= (Norm
) norms
[field
];
589 Array
.Copy(FakeNorms(), 0, bytes
, offset
, MaxDoc());
593 if (norm
.bytes
!= null)
595 // can copy from cache
596 Array
.Copy(norm
.bytes
, 0, bytes
, offset
, MaxDoc());
600 IndexInput normStream
= (IndexInput
) norm
.in_Renamed
.Clone();
605 normStream
.ReadBytes(bytes
, offset
, MaxDoc());
615 private void OpenNorms(Directory cfsDir
)
617 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
619 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
620 if (fi
.isIndexed
&& !fi
.omitNorms
)
622 // look first if there are separate norms in compound format
623 System
.String fileName
= segment
+ ".s" + fi
.number
;
624 Directory d
= Directory();
625 if (!d
.FileExists(fileName
))
627 fileName
= segment
+ ".f" + fi
.number
;
630 norms
[fi
.name
] = new Norm(this, d
.OpenInput(fileName
), fi
.number
);
635 private void CloseNorms()
637 lock (norms
.SyncRoot
)
639 System
.Collections
.IEnumerator enumerator
= norms
.Values
.GetEnumerator();
640 while (enumerator
.MoveNext())
642 Norm norm
= (Norm
) enumerator
.Current
;
643 norm
.in_Renamed
.Close();
648 /// <summary> Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.</summary>
649 /// <returns> TermVectorsReader
651 private TermVectorsReader
GetTermVectorsReader()
653 TermVectorsReader tvReader
= (TermVectorsReader
) System
.Threading
.Thread
.GetData(termVectorsLocal
);
654 if (tvReader
== null)
656 tvReader
= (TermVectorsReader
) termVectorsReaderOrig
.Clone();
657 System
.Threading
.Thread
.SetData(termVectorsLocal
, tvReader
);
662 /// <summary>Return a term frequency vector for the specified document and field. The
663 /// vector returned contains term numbers and frequencies for all terms in
664 /// the specified field of this document, if the field had storeTermVector
665 /// flag set. If the flag was not set, the method returns null.
667 /// <throws> IOException </throws>
668 public override TermFreqVector
GetTermFreqVector(int docNumber
, System
.String field
)
670 // Check if this field is invalid or has no stored term vector
671 FieldInfo fi
= fieldInfos
.FieldInfo(field
);
672 if (fi
== null || !fi
.storeTermVector
|| termVectorsReaderOrig
== null)
675 TermVectorsReader termVectorsReader
= GetTermVectorsReader();
676 if (termVectorsReader
== null)
679 return termVectorsReader
.Get(docNumber
, field
);
683 /// <summary>Return an array of term frequency vectors for the specified document.
684 /// The array contains a vector for each vectorized field in the document.
685 /// Each vector vector contains term numbers and frequencies for all terms
686 /// in a given vectorized field.
687 /// If no such fields existed, the method returns null.
689 /// <throws> IOException </throws>
690 public override TermFreqVector
[] GetTermFreqVectors(int docNumber
)
692 if (termVectorsReaderOrig
== null)
695 TermVectorsReader termVectorsReader
= GetTermVectorsReader();
696 if (termVectorsReader
== null)
699 return termVectorsReader
.Get(docNumber
);
702 static SegmentReader()
707 System
.String name
= SupportClass
.AppSettings
.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader
).FullName
);
708 IMPL
= System
.Type
.GetType(name
);
710 catch (System
.Security
.SecurityException
)
714 IMPL
= System
.Type
.GetType(typeof(SegmentReader
).FullName
);
716 catch (System
.Exception e
)
718 throw new System
.SystemException("cannot load default SegmentReader class: " + e
);
721 catch (System
.Exception e
)
723 throw new System
.SystemException("cannot load SegmentReader class: " + e
);