2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using Document
= Lucene
.Net
.Documents
.Document
;
18 using Field
= Lucene
.Net
.Documents
.Field
;
19 using Directory
= Lucene
.Net
.Store
.Directory
;
20 using IndexInput
= Lucene
.Net
.Store
.IndexInput
;
21 using IndexOutput
= Lucene
.Net
.Store
.IndexOutput
;
22 using BitVector
= Lucene
.Net
.Util
.BitVector
;
23 namespace Lucene
.Net
.Index
26 /// <summary> FIXME: Describe class <code>SegmentReader</code> here.
29 /// <version> $Id: SegmentReader.cs,v 1.6 2006/04/26 19:19:53 joeshaw Exp $
31 class SegmentReader
: IndexReader
33 private System
.String segment
;
35 internal FieldInfos fieldInfos
;
36 private FieldsReader fieldsReader
;
38 internal TermInfosReader tis
;
39 internal TermVectorsReader termVectorsReaderOrig
= null;
40 internal System
.LocalDataStoreSlot termVectorsLocal
= System
.Threading
.Thread
.AllocateDataSlot();
42 internal BitVector deletedDocs
= null;
43 private bool deletedDocsDirty
= false;
44 private bool normsDirty
= false;
45 private bool undeleteAll
= false;
47 internal IndexInput freqStream
;
48 internal IndexInput proxStream
;
50 // Compound File Reader when based on a compound file segment
51 internal CompoundFileReader cfsReader
= null;
55 private void InitBlock(SegmentReader enclosingInstance
)
57 this.enclosingInstance
= enclosingInstance
;
59 private SegmentReader enclosingInstance
;
60 public SegmentReader Enclosing_Instance
64 return enclosingInstance
;
68 public Norm(SegmentReader enclosingInstance
, IndexInput in_Renamed
, int number
)
70 InitBlock(enclosingInstance
);
71 this.in_Renamed
= in_Renamed
;
75 public IndexInput in_Renamed
; // private -> public
76 public byte[] bytes
; // private -> public
77 public bool dirty
; // private -> public
78 public int number
; // private -> public
80 public void ReWrite() // private -> public
82 // NOTE: norms are re-written in regular directory, not cfs
83 IndexOutput out_Renamed
= Enclosing_Instance
.Directory().CreateOutput(Enclosing_Instance
.segment
+ ".tmp");
86 out_Renamed
.WriteBytes(bytes
, Enclosing_Instance
.MaxDoc());
92 System
.String fileName
;
93 if (Enclosing_Instance
.cfsReader
== null)
94 fileName
= Enclosing_Instance
.segment
+ ".f" + number
;
97 // use a different file name if we have compound format
98 fileName
= Enclosing_Instance
.segment
+ ".s" + number
;
100 Enclosing_Instance
.Directory().RenameFile(Enclosing_Instance
.segment
+ ".tmp", fileName
);
105 private System
.Collections
.Hashtable norms
= System
.Collections
.Hashtable
.Synchronized(new System
.Collections
.Hashtable());
107 /// <summary>The class which implements SegmentReader. </summary>
108 private static System
.Type IMPL
;
110 public SegmentReader() : base(null)
114 public static SegmentReader
Get(SegmentInfo si
)
116 return Get(si
.dir
, si
, null, false, false);
119 public static SegmentReader
Get(SegmentInfos sis
, SegmentInfo si
, bool closeDir
)
121 return Get(si
.dir
, si
, sis
, closeDir
, true);
124 public static SegmentReader
Get(Directory dir
, SegmentInfo si
, SegmentInfos sis
, bool closeDir
, bool ownDir
)
126 SegmentReader instance
;
129 instance
= (SegmentReader
) System
.Activator
.CreateInstance(IMPL
);
131 catch (System
.Exception e
)
133 throw new System
.SystemException("cannot load SegmentReader class: " + e
.ToString());
135 instance
.Init(dir
, sis
, closeDir
, ownDir
);
136 instance
.Initialize(si
);
140 private void Initialize(SegmentInfo si
)
144 // Use compound file directory for some files, if it exists
145 Directory cfsDir
= Directory();
146 if (Directory().FileExists(segment
+ ".cfs"))
148 cfsReader
= new CompoundFileReader(Directory(), segment
+ ".cfs");
152 // No compound file exists - use the multi-file format
153 fieldInfos
= new FieldInfos(cfsDir
, segment
+ ".fnm");
154 fieldsReader
= new FieldsReader(cfsDir
, segment
, fieldInfos
);
156 tis
= new TermInfosReader(cfsDir
, segment
, fieldInfos
);
158 // NOTE: the bitvector is stored using the regular directory, not cfs
159 if (HasDeletions(si
))
160 deletedDocs
= new BitVector(Directory(), segment
+ ".del");
162 // make sure that all index files have been read or are kept open
163 // so that if an index update removes them we'll still have them
164 freqStream
= cfsDir
.OpenInput(segment
+ ".frq");
165 proxStream
= cfsDir
.OpenInput(segment
+ ".prx");
168 if (fieldInfos
.HasVectors())
170 // open term vector files only as needed
171 termVectorsReaderOrig
= new TermVectorsReader(cfsDir
, segment
, fieldInfos
);
177 // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
180 System
.Threading
.Thread
.SetData(termVectorsLocal
, null); // {{Aroush-1.9}} is this required for .NET ?!
184 // System.Console.WriteLine(ex.Message);
188 protected internal override void DoCommit()
190 if (deletedDocsDirty
)
193 deletedDocs
.Write(Directory(), segment
+ ".tmp");
194 Directory().RenameFile(segment
+ ".tmp", segment
+ ".del");
196 if (undeleteAll
&& Directory().FileExists(segment
+ ".del"))
198 Directory().DeleteFile(segment
+ ".del");
203 System
.Collections
.IEnumerator values
= norms
.Values
.GetEnumerator();
204 while (values
.MoveNext())
206 Norm norm
= (Norm
) values
.Current
;
213 deletedDocsDirty
= false;
218 protected internal override void DoClose()
220 fieldsReader
.Close();
223 if (freqStream
!= null)
225 if (proxStream
!= null)
230 if (termVectorsReaderOrig
!= null)
231 termVectorsReaderOrig
.Close();
233 if (cfsReader
!= null)
237 internal static bool HasDeletions(SegmentInfo si
)
239 return si
.dir
.FileExists(si
.name
+ ".del");
242 public override bool HasDeletions()
244 return deletedDocs
!= null;
248 internal static bool UsesCompoundFile(SegmentInfo si
)
250 return si
.dir
.FileExists(si
.name
+ ".cfs");
253 internal static bool HasSeparateNorms(SegmentInfo si
)
255 System
.String
[] result
= si
.dir
.List();
256 System
.String pattern
= si
.name
+ ".f";
257 int patternLength
= pattern
.Length
;
258 for (int i
= 0; i
< result
.Length
; i
++)
260 if (result
[i
].StartsWith(pattern
) && System
.Char
.IsDigit(result
[i
][patternLength
]))
266 protected internal override void DoDelete(int docNum
)
268 if (deletedDocs
== null)
269 deletedDocs
= new BitVector(MaxDoc());
270 deletedDocsDirty
= true;
272 deletedDocs
.Set(docNum
);
275 protected internal override void DoUndeleteAll()
278 deletedDocsDirty
= false;
282 internal virtual System
.Collections
.ArrayList
Files()
284 System
.Collections
.ArrayList files
= System
.Collections
.ArrayList
.Synchronized(new System
.Collections
.ArrayList(16));
285 System
.String
[] ext
= new System
.String
[]{"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp"}
;
287 for (int i
= 0; i
< ext
.Length
; i
++)
289 System
.String name
= segment
+ "." + ext
[i
];
290 if (Directory().FileExists(name
))
294 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
296 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
300 if (cfsReader
== null)
301 name
= segment
+ ".f" + i
;
303 name
= segment
+ ".s" + i
;
304 if (Directory().FileExists(name
))
311 public override TermEnum
Terms()
316 public override TermEnum
Terms(Term t
)
321 public override Document
Document(int n
)
326 throw new System
.ArgumentException("attempt to access a deleted document");
327 return fieldsReader
.Doc(n
);
331 public override bool IsDeleted(int n
)
335 return (deletedDocs
!= null && deletedDocs
.Get(n
));
339 public override TermDocs
TermDocs()
341 return new SegmentTermDocs(this);
344 public override TermPositions
TermPositions()
346 return new SegmentTermPositions(this);
349 public override int DocFreq(Term t
)
351 TermInfo ti
= tis
.Get(t
);
358 public override int NumDocs()
361 if (deletedDocs
!= null)
362 n
-= deletedDocs
.Count();
366 public override int MaxDoc()
368 return fieldsReader
.Size();
371 /// <seealso cref="IndexReader#GetFieldNames()">
373 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
375 public override System
.Collections
.ICollection
GetFieldNames()
377 // maintain a unique set of Field names
378 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
379 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
381 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
382 fieldSet
.Add(fi
.name
, fi
.name
);
387 /// <seealso cref="IndexReader#GetFieldNames(boolean)">
389 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
391 public override System
.Collections
.ICollection
GetFieldNames(bool indexed
)
393 // maintain a unique set of Field names
394 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
395 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
397 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
398 if (fi
.isIndexed
== indexed
)
399 fieldSet
.Add(fi
.name
, fi
.name
);
404 /// <seealso cref="tvSpec)">
406 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
408 public override System
.Collections
.ICollection
GetIndexedFieldNames(Field
.TermVector tvSpec
)
410 bool storedTermVector
;
411 bool storePositionWithTermVector
;
412 bool storeOffsetWithTermVector
;
414 if (tvSpec
== Field
.TermVector
.NO
)
416 storedTermVector
= false;
417 storePositionWithTermVector
= false;
418 storeOffsetWithTermVector
= false;
420 else if (tvSpec
== Field
.TermVector
.YES
)
422 storedTermVector
= true;
423 storePositionWithTermVector
= false;
424 storeOffsetWithTermVector
= false;
426 else if (tvSpec
== Field
.TermVector
.WITH_POSITIONS
)
428 storedTermVector
= true;
429 storePositionWithTermVector
= true;
430 storeOffsetWithTermVector
= false;
432 else if (tvSpec
== Field
.TermVector
.WITH_OFFSETS
)
434 storedTermVector
= true;
435 storePositionWithTermVector
= false;
436 storeOffsetWithTermVector
= true;
438 else if (tvSpec
== Field
.TermVector
.WITH_POSITIONS_OFFSETS
)
440 storedTermVector
= true;
441 storePositionWithTermVector
= true;
442 storeOffsetWithTermVector
= true;
446 throw new System
.ArgumentException("unknown termVector parameter " + tvSpec
);
449 // maintain a unique set of Field names
450 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
451 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
453 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
454 if (fi
.isIndexed
&& fi
.storeTermVector
== storedTermVector
&& fi
.storePositionWithTermVector
== storePositionWithTermVector
&& fi
.storeOffsetWithTermVector
== storeOffsetWithTermVector
)
456 fieldSet
.Add(fi
.name
, fi
.name
);
462 /// <seealso cref="fldOption)">
464 public override System
.Collections
.ICollection
GetFieldNames(IndexReader
.FieldOption fieldOption
)
466 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
467 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
469 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
470 if (fieldOption
== IndexReader
.FieldOption
.ALL
)
472 fieldSet
.Add(fi
.name
, fi
.name
);
474 else if (!fi
.isIndexed
&& fieldOption
== IndexReader
.FieldOption
.UNINDEXED
)
476 fieldSet
.Add(fi
.name
, fi
.name
);
478 else if (fi
.isIndexed
&& fieldOption
== IndexReader
.FieldOption
.INDEXED
)
480 fieldSet
.Add(fi
.name
, fi
.name
);
482 else if (fi
.isIndexed
&& fi
.storeTermVector
== false && fieldOption
== IndexReader
.FieldOption
.INDEXED_NO_TERMVECTOR
)
484 fieldSet
.Add(fi
.name
, fi
.name
);
486 else if (fi
.storeTermVector
== true && fi
.storePositionWithTermVector
== false && fi
.storeOffsetWithTermVector
== false && fieldOption
== IndexReader
.FieldOption
.TERMVECTOR
)
488 fieldSet
.Add(fi
.name
, fi
.name
);
490 else if (fi
.isIndexed
&& fi
.storeTermVector
&& fieldOption
== IndexReader
.FieldOption
.INDEXED_WITH_TERMVECTOR
)
492 fieldSet
.Add(fi
.name
, fi
.name
);
494 else if (fi
.storePositionWithTermVector
&& fi
.storeOffsetWithTermVector
== false && fieldOption
== IndexReader
.FieldOption
.TERMVECTOR_WITH_POSITION
)
496 fieldSet
.Add(fi
.name
, fi
.name
);
498 else if (fi
.storeOffsetWithTermVector
&& fi
.storePositionWithTermVector
== false && fieldOption
== IndexReader
.FieldOption
.TERMVECTOR_WITH_OFFSET
)
500 fieldSet
.Add(fi
.name
, fi
.name
);
502 else if ((fi
.storeOffsetWithTermVector
&& fi
.storePositionWithTermVector
) && fieldOption
== IndexReader
.FieldOption
.TERMVECTOR_WITH_POSITION_OFFSET
)
504 fieldSet
.Add(fi
.name
, fi
.name
);
510 /// <seealso cref="fldOption)">
512 public override byte[] Norms(System
.String field
)
516 Norm norm
= (Norm
) norms
[field
];
518 // not an indexed Field
520 if (norm
.bytes
== null)
522 // value not yet read
523 byte[] bytes
= new byte[MaxDoc()];
524 Norms(field
, bytes
, 0);
525 norm
.bytes
= bytes
; // cache it
531 protected internal override void DoSetNorm(int doc
, System
.String field
, byte value_Renamed
)
533 Norm norm
= (Norm
) norms
[field
];
535 // not an indexed Field
537 norm
.dirty
= true; // mark it dirty
540 Norms(field
)[doc
] = value_Renamed
; // set the value
543 /// <summary>Read norms into a pre-allocated array. </summary>
544 public override void Norms(System
.String field
, byte[] bytes
, int offset
)
549 Norm norm
= (Norm
) norms
[field
];
551 return ; // use zeros in array
553 if (norm
.bytes
!= null)
555 // can copy from cache
556 Array
.Copy(norm
.bytes
, 0, bytes
, offset
, MaxDoc());
560 IndexInput normStream
= (IndexInput
) norm
.in_Renamed
.Clone();
565 normStream
.ReadBytes(bytes
, offset
, MaxDoc());
574 private void OpenNorms(Directory cfsDir
)
576 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
578 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
581 // look first if there are separate norms in compound format
582 System
.String fileName
= segment
+ ".s" + fi
.number
;
583 Directory d
= Directory();
584 if (!d
.FileExists(fileName
))
586 fileName
= segment
+ ".f" + fi
.number
;
589 norms
[fi
.name
] = new Norm(this, d
.OpenInput(fileName
), fi
.number
);
594 private void CloseNorms()
596 lock (norms
.SyncRoot
)
598 System
.Collections
.IEnumerator enumerator
= norms
.Values
.GetEnumerator();
599 while (enumerator
.MoveNext())
601 Norm norm
= (Norm
) enumerator
.Current
;
602 norm
.in_Renamed
.Close();
607 /// <summary> Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.</summary>
608 /// <returns> TermVectorsReader
610 private TermVectorsReader
GetTermVectorsReader()
612 TermVectorsReader tvReader
= (TermVectorsReader
) System
.Threading
.Thread
.GetData(termVectorsLocal
);
613 if (tvReader
== null)
615 tvReader
= (TermVectorsReader
) termVectorsReaderOrig
.Clone();
616 System
.Threading
.Thread
.SetData(termVectorsLocal
, tvReader
);
621 /// <summary>Return a term frequency vector for the specified document and Field. The
622 /// vector returned contains term numbers and frequencies for all terms in
623 /// the specified Field of this document, if the Field had storeTermVector
624 /// flag set. If the flag was not set, the method returns null.
626 /// <throws> IOException </throws>
627 public override TermFreqVector
GetTermFreqVector(int docNumber
, System
.String field
)
629 // Check if this Field is invalid or has no stored term vector
630 FieldInfo fi
= fieldInfos
.FieldInfo(field
);
631 if (fi
== null || !fi
.storeTermVector
|| termVectorsReaderOrig
== null)
634 TermVectorsReader termVectorsReader
= GetTermVectorsReader();
635 if (termVectorsReader
== null)
638 return termVectorsReader
.Get(docNumber
, field
);
642 /// <summary>Return an array of term frequency vectors for the specified document.
643 /// The array contains a vector for each vectorized Field in the document.
644 /// Each vector vector contains term numbers and frequencies for all terms
645 /// in a given vectorized Field.
646 /// If no such fields existed, the method returns null.
648 /// <throws> IOException </throws>
649 public override TermFreqVector
[] GetTermFreqVectors(int docNumber
)
651 if (termVectorsReaderOrig
== null)
654 TermVectorsReader termVectorsReader
= GetTermVectorsReader();
655 if (termVectorsReader
== null)
658 return termVectorsReader
.Get(docNumber
);
661 static SegmentReader()
666 System
.String name
= SupportClass
.AppSettings
.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader
).FullName
);
667 IMPL
= System
.Type
.GetType(name
);
669 catch (System
.Exception e
)
671 throw new System
.SystemException("cannot load SegmentReader class: " + e
.ToString());