2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using Document
= Lucene
.Net
.Documents
.Document
;
18 using Directory
= Lucene
.Net
.Store
.Directory
;
19 using InputStream
= Lucene
.Net
.Store
.InputStream
;
20 using OutputStream
= Lucene
.Net
.Store
.OutputStream
;
21 using BitVector
= Lucene
.Net
.Util
.BitVector
;
22 namespace Lucene
.Net
.Index
25 /// <summary> FIXME: Describe class <code>SegmentReader</code> here.
28 /// <version> $Id: SegmentReader.cs,v 1.2 2005/01/17 19:54:29 joeshaw Exp $
30 sealed public class SegmentReader
: IndexReader
32 private System
.String segment
;
34 internal FieldInfos fieldInfos
;
35 private FieldsReader fieldsReader
;
37 internal TermInfosReader tis
;
38 internal TermVectorsReader termVectorsReader
;
40 internal BitVector deletedDocs
= null;
41 private bool deletedDocsDirty
= false;
42 private bool normsDirty
= false;
43 private bool undeleteAll
= false;
45 internal InputStream freqStream
;
46 internal InputStream proxStream
;
48 // Compound File Reader when based on a compound file segment
49 internal CompoundFileReader cfsReader
;
53 private void InitBlock(SegmentReader enclosingInstance
)
55 this.enclosingInstance
= enclosingInstance
;
57 private SegmentReader enclosingInstance
;
58 public SegmentReader Enclosing_Instance
62 return enclosingInstance
;
66 public Norm(SegmentReader enclosingInstance
, InputStream in_Renamed
, int number
)
68 InitBlock(enclosingInstance
);
69 this.in_Renamed
= in_Renamed
;
73 public InputStream in_Renamed
; // private -> public
74 public byte[] bytes
; // private -> public
75 public bool dirty
; // private -> public
76 public int number
; // private -> public
78 public void ReWrite() // private -> public
80 // NOTE: norms are re-written in regular directory, not cfs
81 OutputStream out_Renamed
= Enclosing_Instance
.Directory().CreateFile(Enclosing_Instance
.segment
+ ".tmp");
84 out_Renamed
.WriteBytes(bytes
, Enclosing_Instance
.MaxDoc());
90 System
.String fileName
= Enclosing_Instance
.segment
+ ".f" + number
;
91 Enclosing_Instance
.Directory().RenameFile(Enclosing_Instance
.segment
+ ".tmp", fileName
);
96 private System
.Collections
.Hashtable norms
= System
.Collections
.Hashtable
.Synchronized(new System
.Collections
.Hashtable());
98 public /*internal*/ SegmentReader(SegmentInfos sis
, SegmentInfo si
, bool closeDir
) : base(si
.dir
, sis
, closeDir
)
103 public /*internal*/ SegmentReader(SegmentInfo si
) : base(si
.dir
)
108 private void Initialize(SegmentInfo si
)
112 // Use compound file directory for some files, if it exists
113 Directory cfsDir
= Directory();
114 if (Directory().FileExists(segment
+ ".cfs"))
116 cfsReader
= new CompoundFileReader(Directory(), segment
+ ".cfs");
120 // No compound file exists - use the multi-file format
121 fieldInfos
= new FieldInfos(cfsDir
, segment
+ ".fnm");
122 fieldsReader
= new FieldsReader(cfsDir
, segment
, fieldInfos
);
124 tis
= new TermInfosReader(cfsDir
, segment
, fieldInfos
);
126 // NOTE: the bitvector is stored using the regular directory, not cfs
127 if (HasDeletions(si
))
128 deletedDocs
= new BitVector(Directory(), segment
+ ".del");
130 // make sure that all index files have been read or are kept open
131 // so that if an index update removes them we'll still have them
132 freqStream
= cfsDir
.OpenFile(segment
+ ".frq");
133 proxStream
= cfsDir
.OpenFile(segment
+ ".prx");
136 if (fieldInfos
.HasVectors())
138 // open term vector files only as needed
139 termVectorsReader
= new TermVectorsReader(cfsDir
, segment
, fieldInfos
);
143 protected internal override void DoCommit()
145 if (deletedDocsDirty
)
148 deletedDocs
.Write(Directory(), segment
+ ".tmp");
149 Directory().RenameFile(segment
+ ".tmp", segment
+ ".del");
151 if (undeleteAll
&& Directory().FileExists(segment
+ ".del"))
153 Directory().DeleteFile(segment
+ ".del");
158 System
.Collections
.IEnumerator values
= norms
.Values
.GetEnumerator();
159 while (values
.MoveNext())
161 Norm norm
= (Norm
) values
.Current
;
168 deletedDocsDirty
= false;
173 protected internal override void DoClose()
175 fieldsReader
.Close();
178 if (freqStream
!= null)
180 if (proxStream
!= null)
184 if (termVectorsReader
!= null)
185 termVectorsReader
.Close();
187 if (cfsReader
!= null)
191 internal static bool HasDeletions(SegmentInfo si
)
193 return si
.dir
.FileExists(si
.name
+ ".del");
196 public override bool HasDeletions()
198 return deletedDocs
!= null;
202 internal static bool UsesCompoundFile(SegmentInfo si
)
204 return si
.dir
.FileExists(si
.name
+ ".cfs");
207 internal static bool HasSeparateNorms(SegmentInfo si
)
209 System
.String
[] result
= si
.dir
.List();
210 System
.String pattern
= si
.name
+ ".f";
211 int patternLength
= pattern
.Length
;
212 for (int i
= 0; i
< 0; i
++)
214 if (result
[i
].StartsWith(pattern
) && System
.Char
.IsDigit(result
[i
][patternLength
]))
220 protected internal override void DoDelete(int docNum
)
222 if (deletedDocs
== null)
223 deletedDocs
= new BitVector(MaxDoc());
224 deletedDocsDirty
= true;
226 deletedDocs
.Set(docNum
);
229 protected internal override void DoUndeleteAll()
232 deletedDocsDirty
= false;
236 internal System
.Collections
.ArrayList
Files()
238 System
.Collections
.ArrayList files
= System
.Collections
.ArrayList
.Synchronized(new System
.Collections
.ArrayList(16));
239 System
.String
[] ext
= new System
.String
[]{"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp"}
;
241 for (int i
= 0; i
< ext
.Length
; i
++)
243 System
.String name
= segment
+ "." + ext
[i
];
244 if (Directory().FileExists(name
))
248 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
250 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
252 files
.Add(segment
+ ".f" + i
);
257 public override TermEnum
Terms()
262 public override TermEnum
Terms(Term t
)
267 public override Document
Document(int n
)
272 throw new System
.ArgumentException("attempt to access a deleted document");
273 return fieldsReader
.Doc(n
);
277 public override bool IsDeleted(int n
)
281 return (deletedDocs
!= null && deletedDocs
.Get(n
));
285 public override TermDocs
TermDocs()
287 return new SegmentTermDocs(this);
290 public override TermPositions
TermPositions()
292 return new SegmentTermPositions(this);
295 public override int DocFreq(Term t
)
297 TermInfo ti
= tis
.Get(t
);
304 public override int NumDocs()
307 if (deletedDocs
!= null)
308 n
-= deletedDocs
.Count();
312 public override int MaxDoc()
314 return fieldsReader
.Size();
317 /// <seealso cref="IndexReader#GetFieldNames()">
319 public override System
.Collections
.ICollection
GetFieldNames()
321 // maintain a unique set of Field names
322 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
323 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
325 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
326 fieldSet
.Add(fi
.name
, fi
.name
);
331 /// <seealso cref="IndexReader#GetFieldNames(boolean)">
333 public override System
.Collections
.ICollection
GetFieldNames(bool indexed
)
335 // maintain a unique set of Field names
336 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
337 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
339 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
340 if (fi
.isIndexed
== indexed
)
341 fieldSet
.Add(fi
.name
, fi
.name
);
346 /// <summary> </summary>
347 /// <param name="storedTermVector">if true, returns only Indexed fields that have term vector info,
348 /// else only indexed fields without term vector info
350 /// <returns> Collection of Strings indicating the names of the fields
352 public override System
.Collections
.ICollection
GetIndexedFieldNames(bool storedTermVector
)
354 // maintain a unique set of Field names
355 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
356 for (int ii
= 0; ii
< fieldInfos
.Size(); ii
++)
358 FieldInfo fi
= fieldInfos
.FieldInfo(ii
);
359 if (fi
.isIndexed
== true && fi
.storeTermVector
== storedTermVector
)
361 fieldSet
.Add(fi
.name
, fi
.name
);
367 public override byte[] Norms(System
.String field
)
371 Norm norm
= (Norm
) norms
[field
];
373 // not an indexed Field
375 if (norm
.bytes
== null)
377 // value not yet read
378 byte[] bytes
= new byte[MaxDoc()];
379 Norms(field
, bytes
, 0);
380 norm
.bytes
= bytes
; // cache it
386 protected internal override void DoSetNorm(int doc
, System
.String field
, byte value_Renamed
)
388 Norm norm
= (Norm
) norms
[field
];
390 // not an indexed Field
392 norm
.dirty
= true; // mark it dirty
395 Norms(field
)[doc
] = value_Renamed
; // set the value
398 /// <summary>Read norms into a pre-allocated array. </summary>
399 public override void Norms(System
.String field
, byte[] bytes
, int offset
)
404 Norm norm
= (Norm
) norms
[field
];
406 return ; // use zeros in array
408 if (norm
.bytes
!= null)
410 // can copy from cache
411 Array
.Copy(norm
.bytes
, 0, bytes
, offset
, MaxDoc());
415 InputStream normStream
= (InputStream
) norm
.in_Renamed
.Clone();
420 normStream
.ReadBytes(bytes
, offset
, MaxDoc());
429 private void OpenNorms(Directory cfsDir
)
431 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
433 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
436 System
.String fileName
= segment
+ ".f" + fi
.number
;
437 // look first for re-written file, then in compound format
438 Directory d
= Directory().FileExists(fileName
)?Directory():cfsDir
;
439 norms
[fi
.name
] = new Norm(this, d
.OpenFile(fileName
), fi
.number
);
444 private void CloseNorms()
446 lock (norms
.SyncRoot
)
448 System
.Collections
.IEnumerator enumerator
= norms
.Values
.GetEnumerator();
449 while (enumerator
.MoveNext())
451 Norm norm
= (Norm
) enumerator
.Current
;
452 norm
.in_Renamed
.Close();
457 /// <summary>Return a term frequency vector for the specified document and Field. The
458 /// vector returned contains term numbers and frequencies for all terms in
459 /// the specified Field of this document, if the Field had storeTermVector
460 /// flag set. If the flag was not set, the method returns null.
462 public override TermFreqVector
GetTermFreqVector(int docNumber
, System
.String field
)
464 // Check if this Field is invalid or has no stored term vector
465 FieldInfo fi
= fieldInfos
.FieldInfo(field
);
466 if (fi
== null || !fi
.storeTermVector
)
469 return termVectorsReader
.Get(docNumber
, field
);
473 /// <summary>Return an array of term frequency vectors for the specified document.
474 /// The array contains a vector for each vectorized Field in the document.
475 /// Each vector vector contains term numbers and frequencies for all terms
476 /// in a given vectorized Field.
477 /// If no such fields existed, the method returns null.
479 public override TermFreqVector
[] GetTermFreqVectors(int docNumber
)
481 if (termVectorsReader
== null)
484 return termVectorsReader
.Get(docNumber
);