2 * Copyright 2005 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 using Analyzer
= Lucene
.Net
.Analysis
.Analyzer
;
19 using Document
= Lucene
.Net
.Documents
.Document
;
20 using Directory
= Lucene
.Net
.Store
.Directory
;
21 using FSDirectory
= Lucene
.Net
.Store
.FSDirectory
;
23 namespace Lucene
.Net
.Index
26 /// <summary> A class to modify an index, i.e. to delete and add documents. This
27 /// class hides {@link IndexReader} and {@link IndexWriter} so that you
28 /// do not need to care about implementation details such as that adding
29 /// documents is done via IndexWriter and deletion is done via IndexReader.
31 /// <p>Note that you cannot create more than one <code>IndexModifier</code> object
32 /// on the same directory at the same time.
36 /// <!-- ======================================================== -->
37 /// <!-- = Java Sourcecode to HTML automatically converted code = -->
38 /// <!-- = Java2Html Converter V4.1 2004 by Markus Gebhard markus@jave.de = -->
39 /// <!-- = Further information: http://www.java2html.de = -->
40 /// <div align="left" class="java">
41 /// <table border="0" cellpadding="3" cellspacing="0" bgcolor="#ffffff">
43 /// <!-- start source code -->
44 /// <td nowrap="nowrap" valign="top" align="left">
46 /// <font color="#ffffff"> </font><font color="#000000">Analyzer analyzer = </font><font color="#7f0055"><b>new </b></font><font color="#000000">StandardAnalyzer</font><font color="#000000">()</font><font color="#000000">;</font><br/>
47 /// <font color="#ffffff"> </font><font color="#3f7f5f">// create an index in /tmp/index, overwriting an existing one:</font><br/>
48 /// <font color="#ffffff"> </font><font color="#000000">IndexModifier indexModifier = </font><font color="#7f0055"><b>new </b></font><font color="#000000">IndexModifier</font><font color="#000000">(</font><font color="#2a00ff">"/tmp/index"</font><font color="#000000">, analyzer, </font><font color="#7f0055"><b>true</b></font><font color="#000000">)</font><font color="#000000">;</font><br/>
49 /// <font color="#ffffff"> </font><font color="#000000">Document doc = </font><font color="#7f0055"><b>new </b></font><font color="#000000">Document</font><font color="#000000">()</font><font color="#000000">;</font><br/>
50 /// <font color="#ffffff"> </font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">"id"</font><font color="#000000">, </font><font color="#2a00ff">"1"</font><font color="#000000">, Field.Store.YES, Field.Index.UN_TOKENIZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
51 /// <font color="#ffffff"> </font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">"body"</font><font color="#000000">, </font><font color="#2a00ff">"a simple test"</font><font color="#000000">, Field.Store.YES, Field.Index.TOKENIZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
52 /// <font color="#ffffff"> </font><font color="#000000">indexModifier.addDocument</font><font color="#000000">(</font><font color="#000000">doc</font><font color="#000000">)</font><font color="#000000">;</font><br/>
53 /// <font color="#ffffff"> </font><font color="#7f0055"><b>int </b></font><font color="#000000">deleted = indexModifier.delete</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Term</font><font color="#000000">(</font><font color="#2a00ff">"id"</font><font color="#000000">, </font><font color="#2a00ff">"1"</font><font color="#000000">))</font><font color="#000000">;</font><br/>
54 /// <font color="#ffffff"> </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#2a00ff">"Deleted " </font><font color="#000000">+ deleted + </font><font color="#2a00ff">" document"</font><font color="#000000">)</font><font color="#000000">;</font><br/>
55 /// <font color="#ffffff"> </font><font color="#000000">indexModifier.flush</font><font color="#000000">()</font><font color="#000000">;</font><br/>
56 /// <font color="#ffffff"> </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#000000">indexModifier.docCount</font><font color="#000000">() </font><font color="#000000">+ </font><font color="#2a00ff">" docs in index"</font><font color="#000000">)</font><font color="#000000">;</font><br/>
57 /// <font color="#ffffff"> </font><font color="#000000">indexModifier.close</font><font color="#000000">()</font><font color="#000000">;</font></code>
59 /// <!-- end source code -->
63 /// <!-- = END of automatically generated HTML code = -->
64 /// <!-- ======================================================== -->
66 /// <p>Not all methods of IndexReader and IndexWriter are offered by this
67 /// class. If you need access to additional methods, either use those classes
68 /// directly or implement your own class that extends <code>IndexModifier</code>.
70 /// <p>Although an instance of this class can be used from more than one
71 /// thread, you will not get the best performance. You might want to use
72 /// IndexReader and IndexWriter directly for that (but you will need to
73 /// care about synchronization yourself then).
75 /// <p>While you can freely mix calls to add() and delete() using this class,
76 /// you should batch you calls for best performance. For example, if you
77 /// want to update 20 documents, you should first delete all those documents,
78 /// then add all the new documents.
81 /// <author> Daniel Naber
83 public class IndexModifier
85 private void InitBlock()
87 maxBufferedDocs
= IndexWriter
.DEFAULT_MAX_BUFFERED_DOCS
;
88 maxFieldLength
= IndexWriter
.DEFAULT_MAX_FIELD_LENGTH
;
89 mergeFactor
= IndexWriter
.DEFAULT_MERGE_FACTOR
;
92 protected internal IndexWriter indexWriter
= null;
93 protected internal IndexReader indexReader
= null;
95 protected internal Directory directory
= null;
96 protected internal Analyzer analyzer
= null;
97 protected internal bool open
= false;
100 protected internal System
.IO
.StreamWriter infoStream
= null;
101 protected internal bool useCompoundFile
= true;
102 protected internal int maxBufferedDocs
;
103 protected internal int maxFieldLength
;
104 protected internal int mergeFactor
;
106 /// <summary> Open an index with write access.
109 /// <param name="directory">the index directory
111 /// <param name="analyzer">the analyzer to use for adding new documents
113 /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
114 /// <code>false</code> to append to the existing index
116 public IndexModifier(Directory directory
, Analyzer analyzer
, bool create
)
119 Init(directory
, analyzer
, create
);
122 /// <summary> Open an index with write access.
125 /// <param name="dirName">the index directory
127 /// <param name="analyzer">the analyzer to use for adding new documents
129 /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
130 /// <code>false</code> to append to the existing index
132 public IndexModifier(System
.String dirName
, Analyzer analyzer
, bool create
)
135 Directory dir
= FSDirectory
.GetDirectory(dirName
, create
);
136 Init(dir
, analyzer
, create
);
139 /// <summary> Open an index with write access.
142 /// <param name="file">the index directory
144 /// <param name="analyzer">the analyzer to use for adding new documents
146 /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
147 /// <code>false</code> to append to the existing index
149 public IndexModifier(System
.IO
.FileInfo file
, Analyzer analyzer
, bool create
)
152 Directory dir
= FSDirectory
.GetDirectory(file
, create
);
153 Init(dir
, analyzer
, create
);
156 /// <summary> Initialize an IndexWriter.</summary>
157 /// <throws> IOException </throws>
158 protected internal virtual void Init(Directory directory
, Analyzer analyzer
, bool create
)
160 this.directory
= directory
;
161 lock (this.directory
)
163 this.analyzer
= analyzer
;
164 indexWriter
= new IndexWriter(directory
, analyzer
, create
);
169 /// <summary> Throw an IllegalStateException if the index is closed.</summary>
170 /// <throws> IllegalStateException </throws>
171 protected internal virtual void AssureOpen()
175 throw new System
.SystemException("Index is closed");
179 /// <summary> Close the IndexReader and open an IndexWriter.</summary>
180 /// <throws> IOException </throws>
181 protected internal virtual void CreateIndexWriter()
183 if (indexWriter
== null)
185 if (indexReader
!= null)
190 indexWriter
= new IndexWriter(directory
, analyzer
, false);
191 indexWriter
.SetInfoStream(infoStream
);
192 indexWriter
.SetUseCompoundFile(useCompoundFile
);
193 indexWriter
.SetMaxBufferedDocs(maxBufferedDocs
);
194 indexWriter
.SetMaxFieldLength(maxFieldLength
);
195 indexWriter
.SetMergeFactor(mergeFactor
);
199 /// <summary> Close the IndexWriter and open an IndexReader.</summary>
200 /// <throws> IOException </throws>
201 protected internal virtual void CreateIndexReader()
203 if (indexReader
== null)
205 if (indexWriter
!= null)
210 indexReader
= IndexReader
.Open(directory
);
214 /// <summary> Make sure all changes are written to disk.</summary>
215 /// <throws> IOException </throws>
216 public virtual void Flush()
221 if (indexWriter
!= null)
236 /// <summary> Adds a document to this index, using the provided analyzer instead of the
237 /// one specific in the constructor. If the document contains more than
238 /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
241 /// <seealso cref="IndexWriter.AddDocument(Document, Analyzer)">
243 /// <throws> IllegalStateException if the index is closed </throws>
244 public virtual void AddDocument(Document doc
, Analyzer docAnalyzer
)
250 if (docAnalyzer
!= null)
251 indexWriter
.AddDocument(doc
, docAnalyzer
);
253 indexWriter
.AddDocument(doc
);
257 /// <summary> Adds a document to this index. If the document contains more than
258 /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
261 /// <seealso cref="IndexWriter.AddDocument(Document)">
263 /// <throws> IllegalStateException if the index is closed </throws>
264 public virtual void AddDocument(Document doc
)
266 AddDocument(doc
, null);
269 /// <summary> Deletes all documents containing <code>term</code>.
270 /// This is useful if one uses a document field to hold a unique ID string for
271 /// the document. Then to delete such a document, one merely constructs a
272 /// term with the appropriate field and the unique ID string as its text and
273 /// passes it to this method. Returns the number of documents deleted.
275 /// <returns> the number of documents deleted
277 /// <seealso cref="IndexReader.DeleteDocuments(Term)">
279 /// <throws> IllegalStateException if the index is closed </throws>
280 public virtual int DeleteDocuments(Term term
)
286 return indexReader
.DeleteDocuments(term
);
290 /// <summary> Deletes all documents containing <code>term</code>.
291 /// This is useful if one uses a document field to hold a unique ID string for
292 /// the document. Then to delete such a document, one merely constructs a
293 /// term with the appropriate field and the unique ID string as its text and
294 /// passes it to this method. Returns the number of documents deleted.
296 /// <returns> the number of documents deleted
298 /// <seealso cref="IndexReader.DeleteDocuments(Term)">
300 /// <throws> IllegalStateException if the index is closed </throws>
301 /// <deprecated> Use {@link #DeleteDocuments(Term)} instead.
303 public virtual int Delete(Term term
)
305 return DeleteDocuments(term
);
308 /// <summary> Deletes the document numbered <code>docNum</code>.</summary>
309 /// <seealso cref="IndexReader.DeleteDocument(int)">
311 /// <throws> IllegalStateException if the index is closed </throws>
312 public virtual void DeleteDocument(int docNum
)
318 indexReader
.DeleteDocument(docNum
);
322 /// <summary> Deletes the document numbered <code>docNum</code>.</summary>
323 /// <seealso cref="IndexReader.DeleteDocument(int)">
325 /// <throws> IllegalStateException if the index is closed </throws>
326 /// <deprecated> Use {@link #DeleteDocument(int)} instead.
328 public virtual void Delete(int docNum
)
330 DeleteDocument(docNum
);
333 /// <summary> Returns the number of documents currently in this index.</summary>
334 /// <seealso cref="IndexWriter.DocCount()">
336 /// <seealso cref="IndexReader.NumDocs()">
338 /// <throws> IllegalStateException if the index is closed </throws>
339 public virtual int DocCount()
344 if (indexWriter
!= null)
346 return indexWriter
.DocCount();
350 return indexReader
.NumDocs();
355 /// <summary> Merges all segments together into a single segment, optimizing an index
358 /// <seealso cref="IndexWriter.Optimize()">
360 /// <throws> IllegalStateException if the index is closed </throws>
361 public virtual void Optimize()
367 indexWriter
.Optimize();
371 /// <summary> If non-null, information about merges and a message when
372 /// {@link #GetMaxFieldLength()} is reached will be printed to this.
373 /// <p>Example: <tt>index.setInfoStream(System.err);</tt>
375 /// <seealso cref="IndexWriter.SetInfoStream(PrintStream)">
377 /// <throws> IllegalStateException if the index is closed </throws>
378 public virtual void SetInfoStream(System
.IO
.StreamWriter infoStream
)
383 if (indexWriter
!= null)
385 indexWriter
.SetInfoStream(infoStream
);
387 this.infoStream
= infoStream
;
391 /// <throws> IOException </throws>
392 /// <seealso cref="IndexModifier.SetInfoStream(PrintStream)">
394 public virtual System
.IO
.TextWriter
GetInfoStream()
400 return indexWriter
.GetInfoStream();
404 /// <summary> Setting to turn on usage of a compound file. When on, multiple files
405 /// for each segment are merged into a single file once the segment creation
406 /// is finished. This is done regardless of what directory is in use.
408 /// <seealso cref="IndexWriter.SetUseCompoundFile(boolean)">
410 /// <throws> IllegalStateException if the index is closed </throws>
411 public virtual void SetUseCompoundFile(bool useCompoundFile
)
416 if (indexWriter
!= null)
418 indexWriter
.SetUseCompoundFile(useCompoundFile
);
420 this.useCompoundFile
= useCompoundFile
;
424 /// <throws> IOException </throws>
425 /// <seealso cref="IndexModifier.SetUseCompoundFile(boolean)">
427 public virtual bool GetUseCompoundFile()
433 return indexWriter
.GetUseCompoundFile();
437 /// <summary> The maximum number of terms that will be indexed for a single field in a
438 /// document. This limits the amount of memory required for indexing, so that
439 /// collections with very large files will not crash the indexing process by
440 /// running out of memory.<p/>
441 /// Note that this effectively truncates large documents, excluding from the
442 /// index terms that occur further in the document. If you know your source
443 /// documents are large, be sure to set this value high enough to accomodate
444 /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
445 /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
446 /// By default, no more than 10,000 terms will be indexed for a field.
448 /// <seealso cref="IndexWriter.SetMaxFieldLength(int)">
450 /// <throws> IllegalStateException if the index is closed </throws>
451 public virtual void SetMaxFieldLength(int maxFieldLength
)
456 if (indexWriter
!= null)
458 indexWriter
.SetMaxFieldLength(maxFieldLength
);
460 this.maxFieldLength
= maxFieldLength
;
464 /// <throws> IOException </throws>
465 /// <seealso cref="IndexModifier.SetMaxFieldLength(int)">
467 public virtual int GetMaxFieldLength()
473 return indexWriter
.GetMaxFieldLength();
477 /// <summary> The maximum number of terms that will be indexed for a single field in a
478 /// document. This limits the amount of memory required for indexing, so that
479 /// collections with very large files will not crash the indexing process by
480 /// running out of memory.<p/>
481 /// Note that this effectively truncates large documents, excluding from the
482 /// index terms that occur further in the document. If you know your source
483 /// documents are large, be sure to set this value high enough to accomodate
484 /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
485 /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
486 /// By default, no more than 10,000 terms will be indexed for a field.
488 /// <seealso cref="IndexWriter.SetMaxBufferedDocs(int)">
490 /// <throws> IllegalStateException if the index is closed </throws>
491 public virtual void SetMaxBufferedDocs(int maxBufferedDocs
)
496 if (indexWriter
!= null)
498 indexWriter
.SetMaxBufferedDocs(maxBufferedDocs
);
500 this.maxBufferedDocs
= maxBufferedDocs
;
504 /// <throws> IOException </throws>
505 /// <seealso cref="IndexModifier.SetMaxBufferedDocs(int)">
507 public virtual int GetMaxBufferedDocs()
513 return indexWriter
.GetMaxBufferedDocs();
517 /// <summary> Determines how often segment indices are merged by addDocument(). With
518 /// smaller values, less RAM is used while indexing, and searches on
519 /// unoptimized indices are faster, but indexing speed is slower. With larger
520 /// values, more RAM is used during indexing, and while searches on unoptimized
521 /// indices are slower, indexing is faster. Thus larger values (> 10) are best
522 /// for batch index creation, and smaller values (< 10) for indices that are
523 /// interactively maintained.
524 /// <p>This must never be less than 2. The default value is 10.
527 /// <seealso cref="IndexWriter.SetMergeFactor(int)">
529 /// <throws> IllegalStateException if the index is closed </throws>
530 public virtual void SetMergeFactor(int mergeFactor
)
535 if (indexWriter
!= null)
537 indexWriter
.SetMergeFactor(mergeFactor
);
539 this.mergeFactor
= mergeFactor
;
543 /// <throws> IOException </throws>
544 /// <seealso cref="IndexModifier.SetMergeFactor(int)">
546 public virtual int GetMergeFactor()
552 return indexWriter
.GetMergeFactor();
556 /// <summary> Close this index, writing all pending changes to disk.
559 /// <throws> IllegalStateException if the index has been closed before already </throws>
560 public virtual void Close()
565 throw new System
.SystemException("Index is closed already");
566 if (indexWriter
!= null)
580 public override System
.String
ToString()
582 return "Index@" + directory
;
586 // used as an example in the javadoc:
587 public static void main(String[] args) throws IOException {
588 Analyzer analyzer = new StandardAnalyzer();
589 // create an index in /tmp/index, overwriting an existing one:
590 IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
591 Document doc = new Document();
592 doc.add(new Field("id", "1", Field.Store.YES, Field.Index.UN_TOKENIZED));
593 doc.add(new Field("body", "a simple test", Field.Store.YES, Field.Index.TOKENIZED));
594 indexModifier.addDocument(doc);
595 int deleted = indexModifier.delete(new Term("id", "1"));
596 System.out.println("Deleted " + deleted + " document");
597 indexModifier.flush();
598 System.out.println(indexModifier.docCount() + " docs in index");
599 indexModifier.close();