2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using Document
= Lucene
.Net
.Documents
.Document
;
18 using Field
= Lucene
.Net
.Documents
.Field
;
19 using Similarity
= Lucene
.Net
.Search
.Similarity
;
20 using Directory
= Lucene
.Net
.Store
.Directory
;
21 using FSDirectory
= Lucene
.Net
.Store
.FSDirectory
;
22 using Lock
= Lucene
.Net
.Store
.Lock
;
23 namespace Lucene
.Net
.Index
26 /// <summary>IndexReader is an abstract class, providing an interface for accessing an
27 /// index. Search of an index is done entirely through this abstract interface,
28 /// so that any subclass which implements it is searchable.
29 /// <p> Concrete subclasses of IndexReader are usually constructed with a call to
30 /// the static method {@link #open}.
31 /// <p> For efficiency, in this API documents are often referred to via
32 /// <i>document numbers</i>, non-negative integers which each name a unique
33 /// document in the index. These document numbers are ephemeral--they may change
34 /// as documents are added to and deleted from an index. Clients should thus not
35 /// rely on a given document having the same number between sessions.
37 /// <author> Doug Cutting
39 /// <version> $Id: IndexReader.cs,v 1.2 2005/01/17 19:54:29 joeshaw Exp $
41 public abstract class IndexReader
43 private class AnonymousClassWith
: Lock
.With
45 private void InitBlock(Lucene
.Net
.Store
.Directory directory
, bool closeDirectory
)
47 this.directory
= directory
;
48 this.closeDirectory
= closeDirectory
;
50 private Lucene
.Net
.Store
.Directory directory
;
51 private bool closeDirectory
;
52 internal AnonymousClassWith(Lucene
.Net
.Store
.Directory directory
, bool closeDirectory
, Lucene
.Net
.Store
.Lock Param1
, long Param2
) : base(Param1
, Param2
)
54 InitBlock(directory
, closeDirectory
);
56 public override System
.Object
DoBody()
58 SegmentInfos infos
= new SegmentInfos();
59 infos
.Read(directory
);
63 return new SegmentReader(infos
, infos
.Info(0), closeDirectory
);
67 IndexReader
[] readers
= new IndexReader
[infos
.Count
];
68 for (int i
= 0; i
< infos
.Count
; i
++)
69 readers
[i
] = new SegmentReader(infos
.Info(i
));
70 return new MultiReader(directory
, infos
, closeDirectory
, readers
);
74 private class AnonymousClassWith1
: Lock
.With
76 private void InitBlock(IndexReader enclosingInstance
)
78 this.enclosingInstance
= enclosingInstance
;
80 private IndexReader enclosingInstance
;
81 public IndexReader Enclosing_Instance
85 return enclosingInstance
;
89 internal AnonymousClassWith1(IndexReader enclosingInstance
, Lucene
.Net
.Store
.Lock Param1
, long Param2
) : base(Param1
, Param2
)
91 InitBlock(enclosingInstance
);
93 public override System
.Object
DoBody()
95 Enclosing_Instance
.DoCommit();
96 Enclosing_Instance
.segmentInfos
.Write(Enclosing_Instance
.directory
);
101 /// <summary> Constructor used if IndexReader is not owner of its directory.
102 /// This is used for IndexReaders that are used within other IndexReaders that take care or locking directories.
105 /// <param name="directory">Directory where IndexReader files reside.
107 protected internal IndexReader(Directory directory
)
109 this.directory
= directory
;
111 directoryOwner
= false;
112 closeDirectory
= false;
118 /// <summary> Constructor used if IndexReader is owner of its directory.
119 /// If IndexReader is owner of its directory, it locks its directory in case of write operations.
122 /// <param name="directory">Directory where IndexReader files reside.
124 /// <param name="segmentInfos">Used for write-l
126 /// <param name="">closeDirectory
128 internal IndexReader(Directory directory
, SegmentInfos segmentInfos
, bool closeDirectory
)
130 this.directory
= directory
;
131 this.segmentInfos
= segmentInfos
;
132 directoryOwner
= true;
133 this.closeDirectory
= closeDirectory
;
139 private Directory directory
;
141 private bool directoryOwner
;
142 private SegmentInfos segmentInfos
;
143 private Lock writeLock
;
145 private bool hasChanges
;
147 private bool closeDirectory
;
149 /// <summary>Returns an IndexReader reading the index in an FSDirectory in the named
152 public static IndexReader
Open(System
.String path
)
154 return Open(FSDirectory
.GetDirectory(path
, false), true);
157 /// <summary>Returns an IndexReader reading the index in an FSDirectory in the named
160 public static IndexReader
Open(System
.IO
.FileInfo path
)
162 return Open(FSDirectory
.GetDirectory(path
, false), true);
165 /// <summary>Returns an IndexReader reading the index in the given Directory. </summary>
166 public static IndexReader
Open(Directory directory
)
168 return Open(directory
, false);
171 private static IndexReader
Open(Directory directory
, bool closeDirectory
)
175 // in- & inter-process sync
176 return (IndexReader
) new AnonymousClassWith(directory
, closeDirectory
, directory
.MakeLock(IndexWriter
.COMMIT_LOCK_NAME
), IndexWriter
.COMMIT_LOCK_TIMEOUT
).run();
180 /// <summary>Returns the directory this index resides in. </summary>
181 public virtual Directory
Directory()
186 /// <summary> Returns the time the index in the named directory was last modified.
188 /// <p>Synchronization of IndexReader and IndexWriter instances is
189 /// no longer done via time stamps of the segments file since the time resolution
190 /// depends on the hardware platform. Instead, a version number is maintained
191 /// within the segments file, which is incremented everytime when the index is
195 /// <deprecated> Replaced by {@link #GetCurrentVersion(String)}
198 public static long LastModified(System
.String directory
)
200 return LastModified(new System
.IO
.FileInfo(directory
));
203 /// <summary> Returns the time the index in the named directory was last modified.
205 /// <p>Synchronization of IndexReader and IndexWriter instances is
206 /// no longer done via time stamps of the segments file since the time resolution
207 /// depends on the hardware platform. Instead, a version number is maintained
208 /// within the segments file, which is incremented everytime when the index is
212 /// <deprecated> Replaced by {@link #GetCurrentVersion(File)}
215 public static long LastModified(System
.IO
.FileInfo directory
)
217 return FSDirectory
.FileModified(directory
, "segments");
220 /// <summary> Returns the time the index in the named directory was last modified.
222 /// <p>Synchronization of IndexReader and IndexWriter instances is
223 /// no longer done via time stamps of the segments file since the time resolution
224 /// depends on the hardware platform. Instead, a version number is maintained
225 /// within the segments file, which is incremented everytime when the index is
229 /// <deprecated> Replaced by {@link #GetCurrentVersion(Directory)}
232 public static long LastModified(Directory directory
)
234 return directory
.FileModified("segments");
237 /// <summary> Reads version number from segments files. The version number counts the
238 /// number of changes of the index.
241 /// <param name="directory">where the index resides.
243 /// <returns> version number.
245 /// <throws> IOException if segments file cannot be read </throws>
246 public static long GetCurrentVersion(System
.String directory
)
248 return GetCurrentVersion(new System
.IO
.FileInfo(directory
));
251 /// <summary> Reads version number from segments files. The version number counts the
252 /// number of changes of the index.
255 /// <param name="directory">where the index resides.
257 /// <returns> version number.
259 /// <throws> IOException if segments file cannot be read </throws>
260 public static long GetCurrentVersion(System
.IO
.FileInfo directory
)
262 Directory dir
= FSDirectory
.GetDirectory(directory
, false);
263 long version
= GetCurrentVersion(dir
);
268 /// <summary> Reads version number from segments files. The version number counts the
269 /// number of changes of the index.
272 /// <param name="directory">where the index resides.
274 /// <returns> version number.
276 /// <throws> IOException if segments file cannot be read. </throws>
277 public static long GetCurrentVersion(Directory directory
)
279 return SegmentInfos
.ReadCurrentVersion(directory
);
282 /// <summary>Return an array of term frequency vectors for the specified document.
283 /// The array contains a vector for each vectorized Field in the document.
284 /// Each vector contains terms and frequencies for all terms
285 /// in a given vectorized Field.
286 /// If no such fields existed, the method returns null.
289 /// <seealso cref="Field#IsTermVectorStored()">
291 abstract public TermFreqVector
[] GetTermFreqVectors(int docNumber
);
293 /// <summary>Return a term frequency vector for the specified document and Field. The
294 /// vector returned contains terms and frequencies for those terms in
295 /// the specified Field of this document, if the Field had storeTermVector
296 /// flag set. If the flag was not set, the method returns null.
299 /// <seealso cref="Field#IsTermVectorStored()">
301 abstract public TermFreqVector
GetTermFreqVector(int docNumber
, System
.String field
);
303 /// <summary> Returns <code>true</code> if an index exists at the specified directory.
304 /// If the directory does not exist or if there is no index in it.
305 /// <code>false</code> is returned.
307 /// <param name="directory">the directory to check for an index
309 /// <returns> <code>true</code> if an index exists; <code>false</code> otherwise
311 public static bool IndexExists(System
.String directory
)
314 if (System
.IO
.File
.Exists((new System
.IO
.FileInfo(System
.IO
.Path
.Combine(directory
, "segments"))).FullName
))
317 tmpBool
= System
.IO
.Directory
.Exists((new System
.IO
.FileInfo(System
.IO
.Path
.Combine(directory
, "segments"))).FullName
);
321 /// <summary> Returns <code>true</code> if an index exists at the specified directory.
322 /// If the directory does not exist or if there is no index in it.
324 /// <param name="directory">the directory to check for an index
326 /// <returns> <code>true</code> if an index exists; <code>false</code> otherwise
328 public static bool IndexExists(System
.IO
.FileInfo directory
)
331 if (System
.IO
.File
.Exists((new System
.IO
.FileInfo(System
.IO
.Path
.Combine(directory
.FullName
, "segments"))).FullName
))
334 tmpBool
= System
.IO
.Directory
.Exists((new System
.IO
.FileInfo(System
.IO
.Path
.Combine(directory
.FullName
, "segments"))).FullName
);
338 /// <summary> Returns <code>true</code> if an index exists at the specified directory.
339 /// If the directory does not exist or if there is no index in it.
341 /// <param name="directory">the directory to check for an index
343 /// <returns> <code>true</code> if an index exists; <code>false</code> otherwise
345 /// <throws> IOException if there is a problem with accessing the index </throws>
346 public static bool IndexExists(Directory directory
)
348 return directory
.FileExists("segments");
351 /// <summary>Returns the number of documents in this index. </summary>
352 public abstract int NumDocs();
354 /// <summary>Returns one greater than the largest possible document number.
355 /// This may be used to, e.g., determine how big to allocate an array which
356 /// will have an element for every document number in an index.
358 public abstract int MaxDoc();
360 /// <summary>Returns the stored fields of the <code>n</code><sup>th</sup>
361 /// <code>Document</code> in this index.
363 public abstract Document
Document(int n
);
365 /// <summary>Returns true if document <i>n</i> has been deleted </summary>
366 public abstract bool IsDeleted(int n
);
368 /// <summary>Returns true if any documents have been deleted </summary>
369 public abstract bool HasDeletions();
371 /// <summary>Returns the byte-encoded normalization factor for the named Field of
372 /// every document. This is used by the search code to score documents.
375 /// <seealso cref="Field#SetBoost(float)">
377 public abstract byte[] Norms(System
.String field
);
379 /// <summary>Reads the byte-encoded normalization factor for the named Field of every
380 /// document. This is used by the search code to score documents.
383 /// <seealso cref="Field#SetBoost(float)">
385 public abstract void Norms(System
.String field
, byte[] bytes
, int offset
);
387 /// <summary>Expert: Resets the normalization factor for the named Field of the named
388 /// document. The norm represents the product of the Field's {@link
389 /// Field#SetBoost(float) boost} and its {@link Similarity#LengthNorm(String,
390 /// int) length normalization}. Thus, to preserve the length normalization
391 /// values when resetting this, one should base the new value upon the old.
394 /// <seealso cref="#Norms(String)">
396 /// <seealso cref="Similarity#DecodeNorm(byte)">
398 public void SetNorm(int doc
, System
.String field
, byte value_Renamed
)
404 DoSetNorm(doc
, field
, value_Renamed
);
409 /// <summary>Implements setNorm in subclass.</summary>
410 protected internal abstract void DoSetNorm(int doc
, System
.String field
, byte value_Renamed
);
412 /// <summary>Expert: Resets the normalization factor for the named Field of the named
416 /// <seealso cref="#Norms(String)">
418 /// <seealso cref="Similarity#DecodeNorm(byte)">
420 public virtual void SetNorm(int doc
, System
.String field
, float value_Renamed
)
422 SetNorm(doc
, field
, Similarity
.EncodeNorm(value_Renamed
));
426 /// <summary>Returns an enumeration of all the terms in the index.
427 /// The enumeration is ordered by Term.compareTo(). Each term
428 /// is greater than all that precede it in the enumeration.
430 public abstract TermEnum
Terms();
432 /// <summary>Returns an enumeration of all terms after a given term.
433 /// The enumeration is ordered by Term.compareTo(). Each term
434 /// is greater than all that precede it in the enumeration.
436 public abstract TermEnum
Terms(Term t
);
438 /// <summary>Returns the number of documents containing the term <code>t</code>. </summary>
439 public abstract int DocFreq(Term t
);
441 /// <summary>Returns an enumeration of all the documents which contain
442 /// <code>term</code>. For each document, the document number, the frequency of
443 /// the term in that document is also provided, for use in search scoring.
444 /// Thus, this method implements the mapping:
446 /// Term => <docNum, freq><sup>*</sup>
448 /// <p>The enumeration is ordered by document number. Each document number
449 /// is greater than all that precede it in the enumeration.
451 public virtual TermDocs
TermDocs(Term term
)
453 TermDocs termDocs
= TermDocs();
458 /// <summary>Returns an unpositioned {@link TermDocs} enumerator. </summary>
459 public abstract TermDocs
TermDocs();
461 /// <summary>Returns an enumeration of all the documents which contain
462 /// <code>term</code>. For each document, in addition to the document number
463 /// and frequency of the term in that document, a list of all of the ordinal
464 /// positions of the term in the document is available. Thus, this method
465 /// implements the mapping:
467 /// Term => <docNum, freq,
468 /// <pos<sub>1</sub>, pos<sub>2</sub>, ...
469 /// pos<sub>freq-1</sub>>
472 /// <p> This positional information faciliates phrase and proximity searching.
473 /// <p>The enumeration is ordered by document number. Each document number is
474 /// greater than all that precede it in the enumeration.
476 public virtual TermPositions
TermPositions(Term term
)
478 TermPositions termPositions
= TermPositions();
479 termPositions
.Seek(term
);
480 return termPositions
;
483 /// <summary>Returns an unpositioned {@link TermPositions} enumerator. </summary>
484 public abstract TermPositions
TermPositions();
486 /// <summary> Trys to acquire the WriteLock on this directory.
487 /// this method is only valid if this IndexReader is directory owner.
490 /// <throws> IOException If WriteLock cannot be acquired. </throws>
491 private void AquireWriteLock()
494 throw new System
.IO
.IOException("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
496 if (this.writeLock
== null)
498 Lock writeLock
= directory
.MakeLock(IndexWriter
.WRITE_LOCK_NAME
);
499 if (!writeLock
.Obtain(IndexWriter
.WRITE_LOCK_TIMEOUT
))
502 throw new System
.IO
.IOException("Index locked for write: " + writeLock
);
504 this.writeLock
= writeLock
;
506 // we have to check whether index has changed since this reader was opened.
507 // if so, this reader is no longer valid for deletion
508 if (SegmentInfos
.ReadCurrentVersion(directory
) > segmentInfos
.GetVersion())
511 this.writeLock
.Release();
512 this.writeLock
= null;
513 throw new System
.IO
.IOException("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
518 /// <summary>Deletes the document numbered <code>docNum</code>. Once a document is
519 /// deleted it will not appear in TermDocs or TermPostitions enumerations.
520 /// Attempts to read its Field with the {@link #document}
521 /// method will result in an error. The presence of this document may still be
522 /// reflected in the {@link #docFreq} statistic, though
523 /// this will be corrected eventually as the index is further modified.
525 public void Delete(int docNum
)
536 /// <summary>Implements deletion of the document numbered <code>docNum</code>.
537 /// Applications should call {@link #Delete(int)} or {@link #Delete(Term)}.
539 protected internal abstract void DoDelete(int docNum
);
541 /// <summary>Deletes all documents containing <code>term</code>.
542 /// This is useful if one uses a document Field to hold a unique ID string for
543 /// the document. Then to delete such a document, one merely constructs a
544 /// term with the appropriate Field and the unique ID string as its text and
545 /// passes it to this method. Returns the number of documents deleted.
547 public int Delete(Term term
)
549 TermDocs docs
= TermDocs(term
);
568 /// <summary>Undeletes all documents currently marked as deleted in this index.</summary>
569 public void UndeleteAll()
580 /// <summary>Implements actual undeleteAll() in subclass. </summary>
581 protected internal abstract void DoUndeleteAll();
583 /// <summary> Commit changes resulting from delete, undeleteAll, or setNorm operations
586 /// <throws> IOException </throws>
587 protected internal void Commit()
597 // in- & inter-process sync
598 new AnonymousClassWith1(this, directory
.MakeLock(IndexWriter
.COMMIT_LOCK_NAME
), IndexWriter
.COMMIT_LOCK_TIMEOUT
).run();
600 if (writeLock
!= null)
602 writeLock
.Release(); // release write lock
613 /// <summary>Implements commit. </summary>
614 protected internal abstract void DoCommit();
616 /// <summary> Closes files associated with this index.
617 /// Also saves any new deletions to disk.
618 /// No other methods should be called after this has been called.
628 System
.GC
.SuppressFinalize(this);
632 /// <summary>Implements close. </summary>
633 protected internal abstract void DoClose();
635 /// <summary>Release the write lock, if needed. </summary>
638 if (writeLock
!= null)
640 writeLock
.Release(); // release write lock
645 /// <summary> Returns a list of all unique Field names that exist in the index pointed
646 /// to by this IndexReader.
648 /// <returns> Collection of Strings indicating the names of the fields
650 /// <throws> IOException if there is a problem with accessing the index </throws>
651 public abstract System
.Collections
.ICollection
GetFieldNames();
653 /// <summary> Returns a list of all unique Field names that exist in the index pointed
654 /// to by this IndexReader. The boolean argument specifies whether the fields
655 /// returned are indexed or not.
657 /// <param name="indexed"><code>true</code> if only indexed fields should be returned;
658 /// <code>false</code> if only unindexed fields should be returned.
660 /// <returns> Collection of Strings indicating the names of the fields
662 /// <throws> IOException if there is a problem with accessing the index </throws>
663 public abstract System
.Collections
.ICollection
GetFieldNames(bool indexed
);
665 /// <summary> </summary>
666 /// <param name="storedTermVector">if true, returns only Indexed fields that have term vector info,
667 /// else only indexed fields without term vector info
669 /// <returns> Collection of Strings indicating the names of the fields
671 public abstract System
.Collections
.ICollection
GetIndexedFieldNames(bool storedTermVector
);
673 /// <summary> Returns <code>true</code> iff the index in the named directory is
674 /// currently locked.
676 /// <param name="directory">the directory to check for a lock
678 /// <throws> IOException if there is a problem with accessing the index </throws>
679 public static bool IsLocked(Directory directory
)
681 return directory
.MakeLock(IndexWriter
.WRITE_LOCK_NAME
).IsLocked() || directory
.MakeLock(IndexWriter
.COMMIT_LOCK_NAME
).IsLocked();
684 /// <summary> Returns <code>true</code> iff the index in the named directory is
685 /// currently locked.
687 /// <param name="directory">the directory to check for a lock
689 /// <throws> IOException if there is a problem with accessing the index </throws>
690 public static bool IsLocked(System
.String directory
)
692 Directory dir
= FSDirectory
.GetDirectory(directory
, false);
693 bool result
= IsLocked(dir
);
698 /// <summary> Forcibly unlocks the index in the named directory.
700 /// Caution: this should only be used by failure recovery code,
701 /// when it is known that no other process nor thread is in fact
702 /// currently accessing this index.
704 public static void Unlock(Directory directory
)
706 directory
.MakeLock(IndexWriter
.WRITE_LOCK_NAME
).Release();
707 directory
.MakeLock(IndexWriter
.COMMIT_LOCK_NAME
).Release();