2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using Document
= Lucene
.Net
.Documents
.Document
;
18 using Field
= Lucene
.Net
.Documents
.Field
;
19 using Directory
= Lucene
.Net
.Store
.Directory
;
20 using IndexInput
= Lucene
.Net
.Store
.IndexInput
;
21 using IndexOutput
= Lucene
.Net
.Store
.IndexOutput
;
22 using BitVector
= Lucene
.Net
.Util
.BitVector
;
23 namespace Lucene
.Net
.Index
26 /// <summary> FIXME: Describe class <code>SegmentReader</code> here.
29 /// <version> $Id: SegmentReader.cs,v 1.5 2006/04/03 17:12:17 joeshaw Exp $
31 class SegmentReader
: IndexReader
33 private System
.String segment
;
35 internal FieldInfos fieldInfos
;
36 private FieldsReader fieldsReader
;
38 internal TermInfosReader tis
;
39 internal TermVectorsReader termVectorsReaderOrig
= null;
42 private TermVectorsReader tvReader
;
44 internal BitVector deletedDocs
= null;
45 private bool deletedDocsDirty
= false;
46 private bool normsDirty
= false;
47 private bool undeleteAll
= false;
49 internal IndexInput freqStream
;
50 internal IndexInput proxStream
;
52 // Compound File Reader when based on a compound file segment
53 internal CompoundFileReader cfsReader
= null;
57 private void InitBlock(SegmentReader enclosingInstance
)
59 this.enclosingInstance
= enclosingInstance
;
61 private SegmentReader enclosingInstance
;
62 public SegmentReader Enclosing_Instance
66 return enclosingInstance
;
70 public Norm(SegmentReader enclosingInstance
, IndexInput in_Renamed
, int number
)
72 InitBlock(enclosingInstance
);
73 this.in_Renamed
= in_Renamed
;
77 public IndexInput in_Renamed
; // private -> public
78 public byte[] bytes
; // private -> public
79 public bool dirty
; // private -> public
80 public int number
; // private -> public
82 public void ReWrite() // private -> public
84 // NOTE: norms are re-written in regular directory, not cfs
85 IndexOutput out_Renamed
= Enclosing_Instance
.Directory().CreateOutput(Enclosing_Instance
.segment
+ ".tmp");
88 out_Renamed
.WriteBytes(bytes
, Enclosing_Instance
.MaxDoc());
94 System
.String fileName
;
95 if (Enclosing_Instance
.cfsReader
== null)
96 fileName
= Enclosing_Instance
.segment
+ ".f" + number
;
99 // use a different file name if we have compound format
100 fileName
= Enclosing_Instance
.segment
+ ".s" + number
;
102 Enclosing_Instance
.Directory().RenameFile(Enclosing_Instance
.segment
+ ".tmp", fileName
);
107 private System
.Collections
.Hashtable norms
= System
.Collections
.Hashtable
.Synchronized(new System
.Collections
.Hashtable());
109 /// <summary>The class which implements SegmentReader. </summary>
110 private static System
.Type IMPL
;
112 public SegmentReader() : base(null)
116 public static SegmentReader
Get(SegmentInfo si
)
118 return Get(si
.dir
, si
, null, false, false);
121 public static SegmentReader
Get(SegmentInfos sis
, SegmentInfo si
, bool closeDir
)
123 return Get(si
.dir
, si
, sis
, closeDir
, true);
126 public static SegmentReader
Get(Directory dir
, SegmentInfo si
, SegmentInfos sis
, bool closeDir
, bool ownDir
)
128 SegmentReader instance
;
131 instance
= (SegmentReader
) System
.Activator
.CreateInstance(IMPL
);
133 catch (System
.Exception e
)
135 throw new System
.SystemException("cannot load SegmentReader class: " + e
.ToString());
137 instance
.Init(dir
, sis
, closeDir
, ownDir
);
138 instance
.Initialize(si
);
142 private void Initialize(SegmentInfo si
)
146 // Use compound file directory for some files, if it exists
147 Directory cfsDir
= Directory();
148 if (Directory().FileExists(segment
+ ".cfs"))
150 cfsReader
= new CompoundFileReader(Directory(), segment
+ ".cfs");
154 // No compound file exists - use the multi-file format
155 fieldInfos
= new FieldInfos(cfsDir
, segment
+ ".fnm");
156 fieldsReader
= new FieldsReader(cfsDir
, segment
, fieldInfos
);
158 tis
= new TermInfosReader(cfsDir
, segment
, fieldInfos
);
160 // NOTE: the bitvector is stored using the regular directory, not cfs
161 if (HasDeletions(si
))
162 deletedDocs
= new BitVector(Directory(), segment
+ ".del");
164 // make sure that all index files have been read or are kept open
165 // so that if an index update removes them we'll still have them
166 freqStream
= cfsDir
.OpenInput(segment
+ ".frq");
167 proxStream
= cfsDir
.OpenInput(segment
+ ".prx");
170 if (fieldInfos
.HasVectors())
172 // open term vector files only as needed
173 termVectorsReaderOrig
= new TermVectorsReader(cfsDir
, segment
, fieldInfos
);
177 protected internal override void DoCommit()
179 if (deletedDocsDirty
)
182 deletedDocs
.Write(Directory(), segment
+ ".tmp");
183 Directory().RenameFile(segment
+ ".tmp", segment
+ ".del");
185 if (undeleteAll
&& Directory().FileExists(segment
+ ".del"))
187 Directory().DeleteFile(segment
+ ".del");
192 System
.Collections
.IEnumerator values
= norms
.Values
.GetEnumerator();
193 while (values
.MoveNext())
195 Norm norm
= (Norm
) values
.Current
;
202 deletedDocsDirty
= false;
207 protected internal override void DoClose()
209 fieldsReader
.Close();
212 if (freqStream
!= null)
214 if (proxStream
!= null)
219 if (termVectorsReaderOrig
!= null)
220 termVectorsReaderOrig
.Close();
222 if (cfsReader
!= null)
226 internal static bool HasDeletions(SegmentInfo si
)
228 return si
.dir
.FileExists(si
.name
+ ".del");
231 public override bool HasDeletions()
233 return deletedDocs
!= null;
237 internal static bool UsesCompoundFile(SegmentInfo si
)
239 return si
.dir
.FileExists(si
.name
+ ".cfs");
242 internal static bool HasSeparateNorms(SegmentInfo si
)
244 System
.String
[] result
= si
.dir
.List();
245 System
.String pattern
= si
.name
+ ".f";
246 int patternLength
= pattern
.Length
;
247 for (int i
= 0; i
< result
.Length
; i
++)
249 if (result
[i
].StartsWith(pattern
) && System
.Char
.IsDigit(result
[i
][patternLength
]))
255 protected internal override void DoDelete(int docNum
)
257 if (deletedDocs
== null)
258 deletedDocs
= new BitVector(MaxDoc());
259 deletedDocsDirty
= true;
261 deletedDocs
.Set(docNum
);
264 protected internal override void DoUndeleteAll()
267 deletedDocsDirty
= false;
271 internal virtual System
.Collections
.ArrayList
Files()
273 System
.Collections
.ArrayList files
= System
.Collections
.ArrayList
.Synchronized(new System
.Collections
.ArrayList(16));
274 System
.String
[] ext
= new System
.String
[]{"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp"}
;
276 for (int i
= 0; i
< ext
.Length
; i
++)
278 System
.String name
= segment
+ "." + ext
[i
];
279 if (Directory().FileExists(name
))
283 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
285 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
289 if (cfsReader
== null)
290 name
= segment
+ ".f" + i
;
292 name
= segment
+ ".s" + i
;
293 if (Directory().FileExists(name
))
300 public override TermEnum
Terms()
305 public override TermEnum
Terms(Term t
)
310 public override Document
Document(int n
)
315 throw new System
.ArgumentException("attempt to access a deleted document");
316 return fieldsReader
.Doc(n
);
320 public override bool IsDeleted(int n
)
324 return (deletedDocs
!= null && deletedDocs
.Get(n
));
328 public override TermDocs
TermDocs()
330 return new SegmentTermDocs(this);
333 public override TermPositions
TermPositions()
335 return new SegmentTermPositions(this);
338 public override int DocFreq(Term t
)
340 TermInfo ti
= tis
.Get(t
);
347 public override int NumDocs()
350 if (deletedDocs
!= null)
351 n
-= deletedDocs
.Count();
355 public override int MaxDoc()
357 return fieldsReader
.Size();
360 /// <seealso cref="IndexReader#GetFieldNames()">
362 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
364 public override System
.Collections
.ICollection
GetFieldNames()
366 // maintain a unique set of Field names
367 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
368 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
370 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
371 fieldSet
.Add(fi
.name
, fi
.name
);
376 /// <seealso cref="IndexReader#GetFieldNames(boolean)">
378 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
380 public override System
.Collections
.ICollection
GetFieldNames(bool indexed
)
382 // maintain a unique set of Field names
383 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
384 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
386 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
387 if (fi
.isIndexed
== indexed
)
388 fieldSet
.Add(fi
.name
, fi
.name
);
393 /// <seealso cref="tvSpec)">
395 /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
397 public override System
.Collections
.ICollection
GetIndexedFieldNames(Field
.TermVector tvSpec
)
399 bool storedTermVector
;
400 bool storePositionWithTermVector
;
401 bool storeOffsetWithTermVector
;
403 if (tvSpec
== Field
.TermVector
.NO
)
405 storedTermVector
= false;
406 storePositionWithTermVector
= false;
407 storeOffsetWithTermVector
= false;
409 else if (tvSpec
== Field
.TermVector
.YES
)
411 storedTermVector
= true;
412 storePositionWithTermVector
= false;
413 storeOffsetWithTermVector
= false;
415 else if (tvSpec
== Field
.TermVector
.WITH_POSITIONS
)
417 storedTermVector
= true;
418 storePositionWithTermVector
= true;
419 storeOffsetWithTermVector
= false;
421 else if (tvSpec
== Field
.TermVector
.WITH_OFFSETS
)
423 storedTermVector
= true;
424 storePositionWithTermVector
= false;
425 storeOffsetWithTermVector
= true;
427 else if (tvSpec
== Field
.TermVector
.WITH_POSITIONS_OFFSETS
)
429 storedTermVector
= true;
430 storePositionWithTermVector
= true;
431 storeOffsetWithTermVector
= true;
435 throw new System
.ArgumentException("unknown termVector parameter " + tvSpec
);
438 // maintain a unique set of Field names
439 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
440 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
442 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
443 if (fi
.isIndexed
&& fi
.storeTermVector
== storedTermVector
&& fi
.storePositionWithTermVector
== storePositionWithTermVector
&& fi
.storeOffsetWithTermVector
== storeOffsetWithTermVector
)
445 fieldSet
.Add(fi
.name
, fi
.name
);
451 /// <seealso cref="fldOption)">
453 public override System
.Collections
.ICollection
GetFieldNames(IndexReader
.FieldOption fieldOption
)
455 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
456 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
458 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
459 if (fieldOption
== IndexReader
.FieldOption
.ALL
)
461 fieldSet
.Add(fi
.name
, fi
.name
);
463 else if (!fi
.isIndexed
&& fieldOption
== IndexReader
.FieldOption
.UNINDEXED
)
465 fieldSet
.Add(fi
.name
, fi
.name
);
467 else if (fi
.isIndexed
&& fieldOption
== IndexReader
.FieldOption
.INDEXED
)
469 fieldSet
.Add(fi
.name
, fi
.name
);
471 else if (fi
.isIndexed
&& fi
.storeTermVector
== false && fieldOption
== IndexReader
.FieldOption
.INDEXED_NO_TERMVECTOR
)
473 fieldSet
.Add(fi
.name
, fi
.name
);
475 else if (fi
.storeTermVector
== true && fi
.storePositionWithTermVector
== false && fi
.storeOffsetWithTermVector
== false && fieldOption
== IndexReader
.FieldOption
.TERMVECTOR
)
477 fieldSet
.Add(fi
.name
, fi
.name
);
479 else if (fi
.isIndexed
&& fi
.storeTermVector
&& fieldOption
== IndexReader
.FieldOption
.INDEXED_WITH_TERMVECTOR
)
481 fieldSet
.Add(fi
.name
, fi
.name
);
483 else if (fi
.storePositionWithTermVector
&& fi
.storeOffsetWithTermVector
== false && fieldOption
== IndexReader
.FieldOption
.TERMVECTOR_WITH_POSITION
)
485 fieldSet
.Add(fi
.name
, fi
.name
);
487 else if (fi
.storeOffsetWithTermVector
&& fi
.storePositionWithTermVector
== false && fieldOption
== IndexReader
.FieldOption
.TERMVECTOR_WITH_OFFSET
)
489 fieldSet
.Add(fi
.name
, fi
.name
);
491 else if ((fi
.storeOffsetWithTermVector
&& fi
.storePositionWithTermVector
) && fieldOption
== IndexReader
.FieldOption
.TERMVECTOR_WITH_POSITION_OFFSET
)
493 fieldSet
.Add(fi
.name
, fi
.name
);
499 /// <seealso cref="fldOption)">
501 public override byte[] Norms(System
.String field
)
505 Norm norm
= (Norm
) norms
[field
];
507 // not an indexed Field
509 if (norm
.bytes
== null)
511 // value not yet read
512 byte[] bytes
= new byte[MaxDoc()];
513 Norms(field
, bytes
, 0);
514 norm
.bytes
= bytes
; // cache it
520 protected internal override void DoSetNorm(int doc
, System
.String field
, byte value_Renamed
)
522 Norm norm
= (Norm
) norms
[field
];
524 // not an indexed Field
526 norm
.dirty
= true; // mark it dirty
529 Norms(field
)[doc
] = value_Renamed
; // set the value
532 /// <summary>Read norms into a pre-allocated array. </summary>
533 public override void Norms(System
.String field
, byte[] bytes
, int offset
)
538 Norm norm
= (Norm
) norms
[field
];
540 return ; // use zeros in array
542 if (norm
.bytes
!= null)
544 // can copy from cache
545 Array
.Copy(norm
.bytes
, 0, bytes
, offset
, MaxDoc());
549 IndexInput normStream
= (IndexInput
) norm
.in_Renamed
.Clone();
554 normStream
.ReadBytes(bytes
, offset
, MaxDoc());
563 private void OpenNorms(Directory cfsDir
)
565 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
567 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
570 // look first if there are separate norms in compound format
571 System
.String fileName
= segment
+ ".s" + fi
.number
;
572 Directory d
= Directory();
573 if (!d
.FileExists(fileName
))
575 fileName
= segment
+ ".f" + fi
.number
;
578 norms
[fi
.name
] = new Norm(this, d
.OpenInput(fileName
), fi
.number
);
583 private void CloseNorms()
585 lock (norms
.SyncRoot
)
587 System
.Collections
.IEnumerator enumerator
= norms
.Values
.GetEnumerator();
588 while (enumerator
.MoveNext())
590 Norm norm
= (Norm
) enumerator
.Current
;
591 norm
.in_Renamed
.Close();
596 /// <summary> Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.</summary>
597 /// <returns> TermVectorsReader
599 private TermVectorsReader
GetTermVectorsReader()
601 if (tvReader
== null)
603 tvReader
= (TermVectorsReader
) termVectorsReaderOrig
.Clone();
608 /// <summary>Return a term frequency vector for the specified document and Field. The
609 /// vector returned contains term numbers and frequencies for all terms in
610 /// the specified Field of this document, if the Field had storeTermVector
611 /// flag set. If the flag was not set, the method returns null.
613 /// <throws> IOException </throws>
614 public override TermFreqVector
GetTermFreqVector(int docNumber
, System
.String field
)
616 // Check if this Field is invalid or has no stored term vector
617 FieldInfo fi
= fieldInfos
.FieldInfo(field
);
618 if (fi
== null || !fi
.storeTermVector
|| termVectorsReaderOrig
== null)
621 TermVectorsReader termVectorsReader
= GetTermVectorsReader();
622 if (termVectorsReader
== null)
625 return termVectorsReader
.Get(docNumber
, field
);
629 /// <summary>Return an array of term frequency vectors for the specified document.
630 /// The array contains a vector for each vectorized Field in the document.
631 /// Each vector vector contains term numbers and frequencies for all terms
632 /// in a given vectorized Field.
633 /// If no such fields existed, the method returns null.
635 /// <throws> IOException </throws>
636 public override TermFreqVector
[] GetTermFreqVectors(int docNumber
)
638 if (termVectorsReaderOrig
== null)
641 TermVectorsReader termVectorsReader
= GetTermVectorsReader();
642 if (termVectorsReader
== null)
645 return termVectorsReader
.Get(docNumber
);
648 static SegmentReader()
653 System
.String name
= SupportClass
.AppSettings
.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader
).FullName
);
654 IMPL
= System
.Type
.GetType(name
);
656 catch (System
.Exception e
)
658 throw new System
.SystemException("cannot load SegmentReader class: " + e
.ToString());