Thumbnail file hits. Based on a patch from D Bera
[beagle.git] / beagled / Lucene.Net / Index / IndexReader.cs
blob3f62456e4271571311de111250e6464314fd1cd8
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 using System;
17 using Document = Lucene.Net.Documents.Document;
18 using Field = Lucene.Net.Documents.Field;
19 using Similarity = Lucene.Net.Search.Similarity;
20 using Directory = Lucene.Net.Store.Directory;
21 using FSDirectory = Lucene.Net.Store.FSDirectory;
22 using Lock = Lucene.Net.Store.Lock;
23 namespace Lucene.Net.Index
26 /// <summary>IndexReader is an abstract class, providing an interface for accessing an
27 /// index. Search of an index is done entirely through this abstract interface,
28 /// so that any subclass which implements it is searchable.
29 /// <p> Concrete subclasses of IndexReader are usually constructed with a call to
30 /// the static method {@link #open}.
31 /// <p> For efficiency, in this API documents are often referred to via
32 /// <i>document numbers</i>, non-negative integers which each name a unique
33 /// document in the index. These document numbers are ephemeral--they may change
34 /// as documents are added to and deleted from an index. Clients should thus not
35 /// rely on a given document having the same number between sessions.
36 /// </summary>
37 /// <author> Doug Cutting
38 /// </author>
39 /// <version> $Id: IndexReader.cs,v 1.2 2005/01/17 19:54:29 joeshaw Exp $
40 /// </version>
41 public abstract class IndexReader
43 private class AnonymousClassWith : Lock.With
45 private void InitBlock(Lucene.Net.Store.Directory directory, bool closeDirectory)
47 this.directory = directory;
48 this.closeDirectory = closeDirectory;
50 private Lucene.Net.Store.Directory directory;
51 private bool closeDirectory;
52 internal AnonymousClassWith(Lucene.Net.Store.Directory directory, bool closeDirectory, Lucene.Net.Store.Lock Param1, long Param2) : base(Param1, Param2)
54 InitBlock(directory, closeDirectory);
56 public override System.Object DoBody()
58 SegmentInfos infos = new SegmentInfos();
59 infos.Read(directory);
60 if (infos.Count == 1)
62 // index is optimized
63 return new SegmentReader(infos, infos.Info(0), closeDirectory);
65 else
67 IndexReader[] readers = new IndexReader[infos.Count];
68 for (int i = 0; i < infos.Count; i++)
69 readers[i] = new SegmentReader(infos.Info(i));
70 return new MultiReader(directory, infos, closeDirectory, readers);
74 private class AnonymousClassWith1 : Lock.With
76 private void InitBlock(IndexReader enclosingInstance)
78 this.enclosingInstance = enclosingInstance;
80 private IndexReader enclosingInstance;
81 public IndexReader Enclosing_Instance
83 get
85 return enclosingInstance;
89 internal AnonymousClassWith1(IndexReader enclosingInstance, Lucene.Net.Store.Lock Param1, long Param2) : base(Param1, Param2)
91 InitBlock(enclosingInstance);
93 public override System.Object DoBody()
95 Enclosing_Instance.DoCommit();
96 Enclosing_Instance.segmentInfos.Write(Enclosing_Instance.directory);
97 return null;
101 /// <summary> Constructor used if IndexReader is not owner of its directory.
102 /// This is used for IndexReaders that are used within other IndexReaders that take care or locking directories.
103 ///
104 /// </summary>
105 /// <param name="directory">Directory where IndexReader files reside.
106 /// </param>
107 protected internal IndexReader(Directory directory)
109 this.directory = directory;
110 segmentInfos = null;
111 directoryOwner = false;
112 closeDirectory = false;
113 stale = false;
114 hasChanges = false;
115 writeLock = null;
118 /// <summary> Constructor used if IndexReader is owner of its directory.
119 /// If IndexReader is owner of its directory, it locks its directory in case of write operations.
120 ///
121 /// </summary>
122 /// <param name="directory">Directory where IndexReader files reside.
123 /// </param>
124 /// <param name="segmentInfos">Used for write-l
125 /// </param>
126 /// <param name="">closeDirectory
127 /// </param>
128 internal IndexReader(Directory directory, SegmentInfos segmentInfos, bool closeDirectory)
130 this.directory = directory;
131 this.segmentInfos = segmentInfos;
132 directoryOwner = true;
133 this.closeDirectory = closeDirectory;
134 stale = false;
135 hasChanges = false;
136 writeLock = null;
139 private Directory directory;
141 private bool directoryOwner;
142 private SegmentInfos segmentInfos;
143 private Lock writeLock;
144 private bool stale;
145 private bool hasChanges;
147 private bool closeDirectory;
149 /// <summary>Returns an IndexReader reading the index in an FSDirectory in the named
150 /// path.
151 /// </summary>
152 public static IndexReader Open(System.String path)
154 return Open(FSDirectory.GetDirectory(path, false), true);
157 /// <summary>Returns an IndexReader reading the index in an FSDirectory in the named
158 /// path.
159 /// </summary>
160 public static IndexReader Open(System.IO.FileInfo path)
162 return Open(FSDirectory.GetDirectory(path, false), true);
165 /// <summary>Returns an IndexReader reading the index in the given Directory. </summary>
166 public static IndexReader Open(Directory directory)
168 return Open(directory, false);
171 private static IndexReader Open(Directory directory, bool closeDirectory)
173 lock (directory)
175 // in- & inter-process sync
176 return (IndexReader) new AnonymousClassWith(directory, closeDirectory, directory.MakeLock(IndexWriter.COMMIT_LOCK_NAME), IndexWriter.COMMIT_LOCK_TIMEOUT).run();
180 /// <summary>Returns the directory this index resides in. </summary>
181 public virtual Directory Directory()
183 return directory;
186 /// <summary> Returns the time the index in the named directory was last modified.
187 ///
188 /// <p>Synchronization of IndexReader and IndexWriter instances is
189 /// no longer done via time stamps of the segments file since the time resolution
190 /// depends on the hardware platform. Instead, a version number is maintained
191 /// within the segments file, which is incremented everytime when the index is
192 /// changed.</p>
193 ///
194 /// </summary>
195 /// <deprecated> Replaced by {@link #GetCurrentVersion(String)}
196 ///
197 /// </deprecated>
198 public static long LastModified(System.String directory)
200 return LastModified(new System.IO.FileInfo(directory));
203 /// <summary> Returns the time the index in the named directory was last modified.
204 ///
205 /// <p>Synchronization of IndexReader and IndexWriter instances is
206 /// no longer done via time stamps of the segments file since the time resolution
207 /// depends on the hardware platform. Instead, a version number is maintained
208 /// within the segments file, which is incremented everytime when the index is
209 /// changed.</p>
210 ///
211 /// </summary>
212 /// <deprecated> Replaced by {@link #GetCurrentVersion(File)}
213 ///
214 /// </deprecated>
215 public static long LastModified(System.IO.FileInfo directory)
217 return FSDirectory.FileModified(directory, "segments");
220 /// <summary> Returns the time the index in the named directory was last modified.
221 ///
222 /// <p>Synchronization of IndexReader and IndexWriter instances is
223 /// no longer done via time stamps of the segments file since the time resolution
224 /// depends on the hardware platform. Instead, a version number is maintained
225 /// within the segments file, which is incremented everytime when the index is
226 /// changed.</p>
227 ///
228 /// </summary>
229 /// <deprecated> Replaced by {@link #GetCurrentVersion(Directory)}
230 ///
231 /// </deprecated>
232 public static long LastModified(Directory directory)
234 return directory.FileModified("segments");
237 /// <summary> Reads version number from segments files. The version number counts the
238 /// number of changes of the index.
239 ///
240 /// </summary>
241 /// <param name="directory">where the index resides.
242 /// </param>
243 /// <returns> version number.
244 /// </returns>
245 /// <throws> IOException if segments file cannot be read </throws>
246 public static long GetCurrentVersion(System.String directory)
248 return GetCurrentVersion(new System.IO.FileInfo(directory));
251 /// <summary> Reads version number from segments files. The version number counts the
252 /// number of changes of the index.
253 ///
254 /// </summary>
255 /// <param name="directory">where the index resides.
256 /// </param>
257 /// <returns> version number.
258 /// </returns>
259 /// <throws> IOException if segments file cannot be read </throws>
260 public static long GetCurrentVersion(System.IO.FileInfo directory)
262 Directory dir = FSDirectory.GetDirectory(directory, false);
263 long version = GetCurrentVersion(dir);
264 dir.Close();
265 return version;
268 /// <summary> Reads version number from segments files. The version number counts the
269 /// number of changes of the index.
270 ///
271 /// </summary>
272 /// <param name="directory">where the index resides.
273 /// </param>
274 /// <returns> version number.
275 /// </returns>
276 /// <throws> IOException if segments file cannot be read. </throws>
277 public static long GetCurrentVersion(Directory directory)
279 return SegmentInfos.ReadCurrentVersion(directory);
282 /// <summary>Return an array of term frequency vectors for the specified document.
283 /// The array contains a vector for each vectorized Field in the document.
284 /// Each vector contains terms and frequencies for all terms
285 /// in a given vectorized Field.
286 /// If no such fields existed, the method returns null.
287 ///
288 /// </summary>
289 /// <seealso cref="Field#IsTermVectorStored()">
290 /// </seealso>
291 abstract public TermFreqVector[] GetTermFreqVectors(int docNumber);
293 /// <summary>Return a term frequency vector for the specified document and Field. The
294 /// vector returned contains terms and frequencies for those terms in
295 /// the specified Field of this document, if the Field had storeTermVector
296 /// flag set. If the flag was not set, the method returns null.
297 ///
298 /// </summary>
299 /// <seealso cref="Field#IsTermVectorStored()">
300 /// </seealso>
301 abstract public TermFreqVector GetTermFreqVector(int docNumber, System.String field);
303 /// <summary> Returns <code>true</code> if an index exists at the specified directory.
304 /// If the directory does not exist or if there is no index in it.
305 /// <code>false</code> is returned.
306 /// </summary>
307 /// <param name="directory">the directory to check for an index
308 /// </param>
309 /// <returns> <code>true</code> if an index exists; <code>false</code> otherwise
310 /// </returns>
311 public static bool IndexExists(System.String directory)
313 bool tmpBool;
314 if (System.IO.File.Exists((new System.IO.FileInfo(System.IO.Path.Combine(directory, "segments"))).FullName))
315 tmpBool = true;
316 else
317 tmpBool = System.IO.Directory.Exists((new System.IO.FileInfo(System.IO.Path.Combine(directory, "segments"))).FullName);
318 return tmpBool;
321 /// <summary> Returns <code>true</code> if an index exists at the specified directory.
322 /// If the directory does not exist or if there is no index in it.
323 /// </summary>
324 /// <param name="directory">the directory to check for an index
325 /// </param>
326 /// <returns> <code>true</code> if an index exists; <code>false</code> otherwise
327 /// </returns>
328 public static bool IndexExists(System.IO.FileInfo directory)
330 bool tmpBool;
331 if (System.IO.File.Exists((new System.IO.FileInfo(System.IO.Path.Combine(directory.FullName, "segments"))).FullName))
332 tmpBool = true;
333 else
334 tmpBool = System.IO.Directory.Exists((new System.IO.FileInfo(System.IO.Path.Combine(directory.FullName, "segments"))).FullName);
335 return tmpBool;
338 /// <summary> Returns <code>true</code> if an index exists at the specified directory.
339 /// If the directory does not exist or if there is no index in it.
340 /// </summary>
341 /// <param name="directory">the directory to check for an index
342 /// </param>
343 /// <returns> <code>true</code> if an index exists; <code>false</code> otherwise
344 /// </returns>
345 /// <throws> IOException if there is a problem with accessing the index </throws>
346 public static bool IndexExists(Directory directory)
348 return directory.FileExists("segments");
351 /// <summary>Returns the number of documents in this index. </summary>
352 public abstract int NumDocs();
354 /// <summary>Returns one greater than the largest possible document number.
355 /// This may be used to, e.g., determine how big to allocate an array which
356 /// will have an element for every document number in an index.
357 /// </summary>
358 public abstract int MaxDoc();
360 /// <summary>Returns the stored fields of the <code>n</code><sup>th</sup>
361 /// <code>Document</code> in this index.
362 /// </summary>
363 public abstract Document Document(int n);
365 /// <summary>Returns true if document <i>n</i> has been deleted </summary>
366 public abstract bool IsDeleted(int n);
368 /// <summary>Returns true if any documents have been deleted </summary>
369 public abstract bool HasDeletions();
371 /// <summary>Returns the byte-encoded normalization factor for the named Field of
372 /// every document. This is used by the search code to score documents.
373 ///
374 /// </summary>
375 /// <seealso cref="Field#SetBoost(float)">
376 /// </seealso>
377 public abstract byte[] Norms(System.String field);
379 /// <summary>Reads the byte-encoded normalization factor for the named Field of every
380 /// document. This is used by the search code to score documents.
381 ///
382 /// </summary>
383 /// <seealso cref="Field#SetBoost(float)">
384 /// </seealso>
385 public abstract void Norms(System.String field, byte[] bytes, int offset);
387 /// <summary>Expert: Resets the normalization factor for the named Field of the named
388 /// document. The norm represents the product of the Field's {@link
389 /// Field#SetBoost(float) boost} and its {@link Similarity#LengthNorm(String,
390 /// int) length normalization}. Thus, to preserve the length normalization
391 /// values when resetting this, one should base the new value upon the old.
392 ///
393 /// </summary>
394 /// <seealso cref="#Norms(String)">
395 /// </seealso>
396 /// <seealso cref="Similarity#DecodeNorm(byte)">
397 /// </seealso>
398 public void SetNorm(int doc, System.String field, byte value_Renamed)
400 lock (this)
402 if (directoryOwner)
403 AquireWriteLock();
404 DoSetNorm(doc, field, value_Renamed);
405 hasChanges = true;
409 /// <summary>Implements setNorm in subclass.</summary>
410 protected internal abstract void DoSetNorm(int doc, System.String field, byte value_Renamed);
412 /// <summary>Expert: Resets the normalization factor for the named Field of the named
413 /// document.
414 ///
415 /// </summary>
416 /// <seealso cref="#Norms(String)">
417 /// </seealso>
418 /// <seealso cref="Similarity#DecodeNorm(byte)">
419 /// </seealso>
420 public virtual void SetNorm(int doc, System.String field, float value_Renamed)
422 SetNorm(doc, field, Similarity.EncodeNorm(value_Renamed));
426 /// <summary>Returns an enumeration of all the terms in the index.
427 /// The enumeration is ordered by Term.compareTo(). Each term
428 /// is greater than all that precede it in the enumeration.
429 /// </summary>
430 public abstract TermEnum Terms();
432 /// <summary>Returns an enumeration of all terms after a given term.
433 /// The enumeration is ordered by Term.compareTo(). Each term
434 /// is greater than all that precede it in the enumeration.
435 /// </summary>
436 public abstract TermEnum Terms(Term t);
438 /// <summary>Returns the number of documents containing the term <code>t</code>. </summary>
439 public abstract int DocFreq(Term t);
441 /// <summary>Returns an enumeration of all the documents which contain
442 /// <code>term</code>. For each document, the document number, the frequency of
443 /// the term in that document is also provided, for use in search scoring.
444 /// Thus, this method implements the mapping:
445 /// <p><ul>
446 /// Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
447 /// </ul>
448 /// <p>The enumeration is ordered by document number. Each document number
449 /// is greater than all that precede it in the enumeration.
450 /// </summary>
451 public virtual TermDocs TermDocs(Term term)
453 TermDocs termDocs = TermDocs();
454 termDocs.Seek(term);
455 return termDocs;
458 /// <summary>Returns an unpositioned {@link TermDocs} enumerator. </summary>
459 public abstract TermDocs TermDocs();
461 /// <summary>Returns an enumeration of all the documents which contain
462 /// <code>term</code>. For each document, in addition to the document number
463 /// and frequency of the term in that document, a list of all of the ordinal
464 /// positions of the term in the document is available. Thus, this method
465 /// implements the mapping:
466 /// <p><ul>
467 /// Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
468 /// &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
469 /// pos<sub>freq-1</sub>&gt;
470 /// &gt;<sup>*</sup>
471 /// </ul>
472 /// <p> This positional information faciliates phrase and proximity searching.
473 /// <p>The enumeration is ordered by document number. Each document number is
474 /// greater than all that precede it in the enumeration.
475 /// </summary>
476 public virtual TermPositions TermPositions(Term term)
478 TermPositions termPositions = TermPositions();
479 termPositions.Seek(term);
480 return termPositions;
483 /// <summary>Returns an unpositioned {@link TermPositions} enumerator. </summary>
484 public abstract TermPositions TermPositions();
486 /// <summary> Trys to acquire the WriteLock on this directory.
487 /// this method is only valid if this IndexReader is directory owner.
488 ///
489 /// </summary>
490 /// <throws> IOException If WriteLock cannot be acquired. </throws>
491 private void AquireWriteLock()
493 if (stale)
494 throw new System.IO.IOException("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
496 if (this.writeLock == null)
498 Lock writeLock = directory.MakeLock(IndexWriter.WRITE_LOCK_NAME);
499 if (!writeLock.Obtain(IndexWriter.WRITE_LOCK_TIMEOUT))
500 // obtain write lock
502 throw new System.IO.IOException("Index locked for write: " + writeLock);
504 this.writeLock = writeLock;
506 // we have to check whether index has changed since this reader was opened.
507 // if so, this reader is no longer valid for deletion
508 if (SegmentInfos.ReadCurrentVersion(directory) > segmentInfos.GetVersion())
510 stale = true;
511 this.writeLock.Release();
512 this.writeLock = null;
513 throw new System.IO.IOException("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
518 /// <summary>Deletes the document numbered <code>docNum</code>. Once a document is
519 /// deleted it will not appear in TermDocs or TermPostitions enumerations.
520 /// Attempts to read its Field with the {@link #document}
521 /// method will result in an error. The presence of this document may still be
522 /// reflected in the {@link #docFreq} statistic, though
523 /// this will be corrected eventually as the index is further modified.
524 /// </summary>
525 public void Delete(int docNum)
527 lock (this)
529 if (directoryOwner)
530 AquireWriteLock();
531 DoDelete(docNum);
532 hasChanges = true;
536 /// <summary>Implements deletion of the document numbered <code>docNum</code>.
537 /// Applications should call {@link #Delete(int)} or {@link #Delete(Term)}.
538 /// </summary>
539 protected internal abstract void DoDelete(int docNum);
541 /// <summary>Deletes all documents containing <code>term</code>.
542 /// This is useful if one uses a document Field to hold a unique ID string for
543 /// the document. Then to delete such a document, one merely constructs a
544 /// term with the appropriate Field and the unique ID string as its text and
545 /// passes it to this method. Returns the number of documents deleted.
546 /// </summary>
547 public int Delete(Term term)
549 TermDocs docs = TermDocs(term);
550 if (docs == null)
551 return 0;
552 int n = 0;
555 while (docs.Next())
557 Delete(docs.Doc());
558 n++;
561 finally
563 docs.Close();
565 return n;
568 /// <summary>Undeletes all documents currently marked as deleted in this index.</summary>
569 public void UndeleteAll()
571 lock (this)
573 if (directoryOwner)
574 AquireWriteLock();
575 DoUndeleteAll();
576 hasChanges = true;
580 /// <summary>Implements actual undeleteAll() in subclass. </summary>
581 protected internal abstract void DoUndeleteAll();
583 /// <summary> Commit changes resulting from delete, undeleteAll, or setNorm operations
584 ///
585 /// </summary>
586 /// <throws> IOException </throws>
587 protected internal void Commit()
589 lock (this)
591 if (hasChanges)
593 if (directoryOwner)
595 lock (directory)
597 // in- & inter-process sync
598 new AnonymousClassWith1(this, directory.MakeLock(IndexWriter.COMMIT_LOCK_NAME), IndexWriter.COMMIT_LOCK_TIMEOUT).run();
600 if (writeLock != null)
602 writeLock.Release(); // release write lock
603 writeLock = null;
606 else
607 DoCommit();
609 hasChanges = false;
613 /// <summary>Implements commit. </summary>
614 protected internal abstract void DoCommit();
616 /// <summary> Closes files associated with this index.
617 /// Also saves any new deletions to disk.
618 /// No other methods should be called after this has been called.
619 /// </summary>
620 public void Close()
622 lock (this)
624 Commit();
625 DoClose();
626 if (closeDirectory)
627 directory.Close();
628 System.GC.SuppressFinalize(this);
632 /// <summary>Implements close. </summary>
633 protected internal abstract void DoClose();
635 /// <summary>Release the write lock, if needed. </summary>
636 ~IndexReader()
638 if (writeLock != null)
640 writeLock.Release(); // release write lock
641 writeLock = null;
645 /// <summary> Returns a list of all unique Field names that exist in the index pointed
646 /// to by this IndexReader.
647 /// </summary>
648 /// <returns> Collection of Strings indicating the names of the fields
649 /// </returns>
650 /// <throws> IOException if there is a problem with accessing the index </throws>
651 public abstract System.Collections.ICollection GetFieldNames();
653 /// <summary> Returns a list of all unique Field names that exist in the index pointed
654 /// to by this IndexReader. The boolean argument specifies whether the fields
655 /// returned are indexed or not.
656 /// </summary>
657 /// <param name="indexed"><code>true</code> if only indexed fields should be returned;
658 /// <code>false</code> if only unindexed fields should be returned.
659 /// </param>
660 /// <returns> Collection of Strings indicating the names of the fields
661 /// </returns>
662 /// <throws> IOException if there is a problem with accessing the index </throws>
663 public abstract System.Collections.ICollection GetFieldNames(bool indexed);
665 /// <summary> </summary>
666 /// <param name="storedTermVector">if true, returns only Indexed fields that have term vector info,
667 /// else only indexed fields without term vector info
668 /// </param>
669 /// <returns> Collection of Strings indicating the names of the fields
670 /// </returns>
671 public abstract System.Collections.ICollection GetIndexedFieldNames(bool storedTermVector);
673 /// <summary> Returns <code>true</code> iff the index in the named directory is
674 /// currently locked.
675 /// </summary>
676 /// <param name="directory">the directory to check for a lock
677 /// </param>
678 /// <throws> IOException if there is a problem with accessing the index </throws>
679 public static bool IsLocked(Directory directory)
681 return directory.MakeLock(IndexWriter.WRITE_LOCK_NAME).IsLocked() || directory.MakeLock(IndexWriter.COMMIT_LOCK_NAME).IsLocked();
684 /// <summary> Returns <code>true</code> iff the index in the named directory is
685 /// currently locked.
686 /// </summary>
687 /// <param name="directory">the directory to check for a lock
688 /// </param>
689 /// <throws> IOException if there is a problem with accessing the index </throws>
690 public static bool IsLocked(System.String directory)
692 Directory dir = FSDirectory.GetDirectory(directory, false);
693 bool result = IsLocked(dir);
694 dir.Close();
695 return result;
698 /// <summary> Forcibly unlocks the index in the named directory.
699 /// <P>
700 /// Caution: this should only be used by failure recovery code,
701 /// when it is known that no other process nor thread is in fact
702 /// currently accessing this index.
703 /// </summary>
704 public static void Unlock(Directory directory)
706 directory.MakeLock(IndexWriter.WRITE_LOCK_NAME).Release();
707 directory.MakeLock(IndexWriter.COMMIT_LOCK_NAME).Release();