cvsimport
[beagle.git] / beagled / Lucene.Net / Index / IndexModifier.cs
blob27ba62afede4eedc5b4eac3b792a016439666927
1 /*
2 * Copyright 2005 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
18 using Analyzer = Lucene.Net.Analysis.Analyzer;
19 using Document = Lucene.Net.Documents.Document;
20 using Directory = Lucene.Net.Store.Directory;
21 using FSDirectory = Lucene.Net.Store.FSDirectory;
23 namespace Lucene.Net.Index
26 /// <summary> A class to modify an index, i.e. to delete and add documents. This
27 /// class hides {@link IndexReader} and {@link IndexWriter} so that you
28 /// do not need to care about implementation details such as that adding
29 /// documents is done via IndexWriter and deletion is done via IndexReader.
30 ///
31 /// <p>Note that you cannot create more than one <code>IndexModifier</code> object
32 /// on the same directory at the same time.
33 ///
34 /// <p>Example usage:
35 ///
36 /// <!-- ======================================================== -->
37 /// <!-- = Java Sourcecode to HTML automatically converted code = -->
38 /// <!-- = Java2Html Converter V4.1 2004 by Markus Gebhard markus@jave.de = -->
39 /// <!-- = Further information: http://www.java2html.de = -->
40 /// <div align="left" class="java">
41 /// <table border="0" cellpadding="3" cellspacing="0" bgcolor="#ffffff">
42 /// <tr>
43 /// <!-- start source code -->
44 /// <td nowrap="nowrap" valign="top" align="left">
45 /// <code>
46 /// <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">Analyzer&nbsp;analyzer&nbsp;=&nbsp;</font><font color="#7f0055"><b>new&nbsp;</b></font><font color="#000000">StandardAnalyzer</font><font color="#000000">()</font><font color="#000000">;</font><br/>
47 /// <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#3f7f5f">//&nbsp;create&nbsp;an&nbsp;index&nbsp;in&nbsp;/tmp/index,&nbsp;overwriting&nbsp;an&nbsp;existing&nbsp;one:</font><br/>
48 /// <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">IndexModifier&nbsp;indexModifier&nbsp;=&nbsp;</font><font color="#7f0055"><b>new&nbsp;</b></font><font color="#000000">IndexModifier</font><font color="#000000">(</font><font color="#2a00ff">&#34;/tmp/index&#34;</font><font color="#000000">,&nbsp;analyzer,&nbsp;</font><font color="#7f0055"><b>true</b></font><font color="#000000">)</font><font color="#000000">;</font><br/>
49 /// <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">Document&nbsp;doc&nbsp;=&nbsp;</font><font color="#7f0055"><b>new&nbsp;</b></font><font color="#000000">Document</font><font color="#000000">()</font><font color="#000000">;</font><br/>
50 /// <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new&nbsp;</b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">&#34;id&#34;</font><font color="#000000">,&nbsp;</font><font color="#2a00ff">&#34;1&#34;</font><font color="#000000">,&nbsp;Field.Store.YES,&nbsp;Field.Index.UN_TOKENIZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
51 /// <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new&nbsp;</b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">&#34;body&#34;</font><font color="#000000">,&nbsp;</font><font color="#2a00ff">&#34;a&nbsp;simple&nbsp;test&#34;</font><font color="#000000">,&nbsp;Field.Store.YES,&nbsp;Field.Index.TOKENIZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
52 /// <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">indexModifier.addDocument</font><font color="#000000">(</font><font color="#000000">doc</font><font color="#000000">)</font><font color="#000000">;</font><br/>
53 /// <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#7f0055"><b>int&nbsp;</b></font><font color="#000000">deleted&nbsp;=&nbsp;indexModifier.delete</font><font color="#000000">(</font><font color="#7f0055"><b>new&nbsp;</b></font><font color="#000000">Term</font><font color="#000000">(</font><font color="#2a00ff">&#34;id&#34;</font><font color="#000000">,&nbsp;</font><font color="#2a00ff">&#34;1&#34;</font><font color="#000000">))</font><font color="#000000">;</font><br/>
54 /// <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#2a00ff">&#34;Deleted&nbsp;&#34;&nbsp;</font><font color="#000000">+&nbsp;deleted&nbsp;+&nbsp;</font><font color="#2a00ff">&#34;&nbsp;document&#34;</font><font color="#000000">)</font><font color="#000000">;</font><br/>
55 /// <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">indexModifier.flush</font><font color="#000000">()</font><font color="#000000">;</font><br/>
56 /// <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#000000">indexModifier.docCount</font><font color="#000000">()&nbsp;</font><font color="#000000">+&nbsp;</font><font color="#2a00ff">&#34;&nbsp;docs&nbsp;in&nbsp;index&#34;</font><font color="#000000">)</font><font color="#000000">;</font><br/>
57 /// <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">indexModifier.close</font><font color="#000000">()</font><font color="#000000">;</font></code>
58 /// </td>
59 /// <!-- end source code -->
60 /// </tr>
61 /// </table>
62 /// </div>
63 /// <!-- = END of automatically generated HTML code = -->
64 /// <!-- ======================================================== -->
65 ///
66 /// <p>Not all methods of IndexReader and IndexWriter are offered by this
67 /// class. If you need access to additional methods, either use those classes
68 /// directly or implement your own class that extends <code>IndexModifier</code>.
69 ///
70 /// <p>Although an instance of this class can be used from more than one
71 /// thread, you will not get the best performance. You might want to use
72 /// IndexReader and IndexWriter directly for that (but you will need to
73 /// care about synchronization yourself then).
74 ///
75 /// <p>While you can freely mix calls to add() and delete() using this class,
76 /// you should batch you calls for best performance. For example, if you
77 /// want to update 20 documents, you should first delete all those documents,
78 /// then add all the new documents.
79 ///
80 /// </summary>
81 /// <author> Daniel Naber
82 /// </author>
83 public class IndexModifier
85 private void InitBlock()
87 maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS;
88 maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
89 mergeFactor = IndexWriter.DEFAULT_MERGE_FACTOR;
92 protected internal IndexWriter indexWriter = null;
93 protected internal IndexReader indexReader = null;
95 protected internal Directory directory = null;
96 protected internal Analyzer analyzer = null;
97 protected internal bool open = false;
99 // Lucene defaults:
100 protected internal System.IO.StreamWriter infoStream = null;
101 protected internal bool useCompoundFile = true;
102 protected internal int maxBufferedDocs;
103 protected internal int maxFieldLength;
104 protected internal int mergeFactor;
106 /// <summary> Open an index with write access.
107 ///
108 /// </summary>
109 /// <param name="directory">the index directory
110 /// </param>
111 /// <param name="analyzer">the analyzer to use for adding new documents
112 /// </param>
113 /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
114 /// <code>false</code> to append to the existing index
115 /// </param>
116 public IndexModifier(Directory directory, Analyzer analyzer, bool create)
118 InitBlock();
119 Init(directory, analyzer, create);
122 /// <summary> Open an index with write access.
123 ///
124 /// </summary>
125 /// <param name="dirName">the index directory
126 /// </param>
127 /// <param name="analyzer">the analyzer to use for adding new documents
128 /// </param>
129 /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
130 /// <code>false</code> to append to the existing index
131 /// </param>
132 public IndexModifier(System.String dirName, Analyzer analyzer, bool create)
134 InitBlock();
135 Directory dir = FSDirectory.GetDirectory(dirName, create);
136 Init(dir, analyzer, create);
139 /// <summary> Open an index with write access.
140 ///
141 /// </summary>
142 /// <param name="file">the index directory
143 /// </param>
144 /// <param name="analyzer">the analyzer to use for adding new documents
145 /// </param>
146 /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
147 /// <code>false</code> to append to the existing index
148 /// </param>
149 public IndexModifier(System.IO.FileInfo file, Analyzer analyzer, bool create)
151 InitBlock();
152 Directory dir = FSDirectory.GetDirectory(file, create);
153 Init(dir, analyzer, create);
156 /// <summary> Initialize an IndexWriter.</summary>
157 /// <throws> IOException </throws>
158 protected internal virtual void Init(Directory directory, Analyzer analyzer, bool create)
160 this.directory = directory;
161 lock (this.directory)
163 this.analyzer = analyzer;
164 indexWriter = new IndexWriter(directory, analyzer, create);
165 open = true;
169 /// <summary> Throw an IllegalStateException if the index is closed.</summary>
170 /// <throws> IllegalStateException </throws>
171 protected internal virtual void AssureOpen()
173 if (!open)
175 throw new System.SystemException("Index is closed");
179 /// <summary> Close the IndexReader and open an IndexWriter.</summary>
180 /// <throws> IOException </throws>
181 protected internal virtual void CreateIndexWriter()
183 if (indexWriter == null)
185 if (indexReader != null)
187 indexReader.Close();
188 indexReader = null;
190 indexWriter = new IndexWriter(directory, analyzer, false);
191 indexWriter.SetInfoStream(infoStream);
192 indexWriter.SetUseCompoundFile(useCompoundFile);
193 indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
194 indexWriter.SetMaxFieldLength(maxFieldLength);
195 indexWriter.SetMergeFactor(mergeFactor);
199 /// <summary> Close the IndexWriter and open an IndexReader.</summary>
200 /// <throws> IOException </throws>
201 protected internal virtual void CreateIndexReader()
203 if (indexReader == null)
205 if (indexWriter != null)
207 indexWriter.Close();
208 indexWriter = null;
210 indexReader = IndexReader.Open(directory);
214 /// <summary> Make sure all changes are written to disk.</summary>
215 /// <throws> IOException </throws>
216 public virtual void Flush()
218 lock (directory)
220 AssureOpen();
221 if (indexWriter != null)
223 indexWriter.Close();
224 indexWriter = null;
225 CreateIndexWriter();
227 else
229 indexReader.Close();
230 indexReader = null;
231 CreateIndexReader();
236 /// <summary> Adds a document to this index, using the provided analyzer instead of the
237 /// one specific in the constructor. If the document contains more than
238 /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
239 /// discarded.
240 /// </summary>
241 /// <seealso cref="IndexWriter.AddDocument(Document, Analyzer)">
242 /// </seealso>
243 /// <throws> IllegalStateException if the index is closed </throws>
244 public virtual void AddDocument(Document doc, Analyzer docAnalyzer)
246 lock (directory)
248 AssureOpen();
249 CreateIndexWriter();
250 if (docAnalyzer != null)
251 indexWriter.AddDocument(doc, docAnalyzer);
252 else
253 indexWriter.AddDocument(doc);
257 /// <summary> Adds a document to this index. If the document contains more than
258 /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
259 /// discarded.
260 /// </summary>
261 /// <seealso cref="IndexWriter.AddDocument(Document)">
262 /// </seealso>
263 /// <throws> IllegalStateException if the index is closed </throws>
264 public virtual void AddDocument(Document doc)
266 AddDocument(doc, null);
269 /// <summary> Deletes all documents containing <code>term</code>.
270 /// This is useful if one uses a document field to hold a unique ID string for
271 /// the document. Then to delete such a document, one merely constructs a
272 /// term with the appropriate field and the unique ID string as its text and
273 /// passes it to this method. Returns the number of documents deleted.
274 /// </summary>
275 /// <returns> the number of documents deleted
276 /// </returns>
277 /// <seealso cref="IndexReader.DeleteDocuments(Term)">
278 /// </seealso>
279 /// <throws> IllegalStateException if the index is closed </throws>
280 public virtual int DeleteDocuments(Term term)
282 lock (directory)
284 AssureOpen();
285 CreateIndexReader();
286 return indexReader.DeleteDocuments(term);
290 /// <summary> Deletes all documents containing <code>term</code>.
291 /// This is useful if one uses a document field to hold a unique ID string for
292 /// the document. Then to delete such a document, one merely constructs a
293 /// term with the appropriate field and the unique ID string as its text and
294 /// passes it to this method. Returns the number of documents deleted.
295 /// </summary>
296 /// <returns> the number of documents deleted
297 /// </returns>
298 /// <seealso cref="IndexReader.DeleteDocuments(Term)">
299 /// </seealso>
300 /// <throws> IllegalStateException if the index is closed </throws>
301 /// <deprecated> Use {@link #DeleteDocuments(Term)} instead.
302 /// </deprecated>
303 public virtual int Delete(Term term)
305 return DeleteDocuments(term);
308 /// <summary> Deletes the document numbered <code>docNum</code>.</summary>
309 /// <seealso cref="IndexReader.DeleteDocument(int)">
310 /// </seealso>
311 /// <throws> IllegalStateException if the index is closed </throws>
312 public virtual void DeleteDocument(int docNum)
314 lock (directory)
316 AssureOpen();
317 CreateIndexReader();
318 indexReader.DeleteDocument(docNum);
322 /// <summary> Deletes the document numbered <code>docNum</code>.</summary>
323 /// <seealso cref="IndexReader.DeleteDocument(int)">
324 /// </seealso>
325 /// <throws> IllegalStateException if the index is closed </throws>
326 /// <deprecated> Use {@link #DeleteDocument(int)} instead.
327 /// </deprecated>
328 public virtual void Delete(int docNum)
330 DeleteDocument(docNum);
333 /// <summary> Returns the number of documents currently in this index.</summary>
334 /// <seealso cref="IndexWriter.DocCount()">
335 /// </seealso>
336 /// <seealso cref="IndexReader.NumDocs()">
337 /// </seealso>
338 /// <throws> IllegalStateException if the index is closed </throws>
339 public virtual int DocCount()
341 lock (directory)
343 AssureOpen();
344 if (indexWriter != null)
346 return indexWriter.DocCount();
348 else
350 return indexReader.NumDocs();
355 /// <summary> Merges all segments together into a single segment, optimizing an index
356 /// for search.
357 /// </summary>
358 /// <seealso cref="IndexWriter.Optimize()">
359 /// </seealso>
360 /// <throws> IllegalStateException if the index is closed </throws>
361 public virtual void Optimize()
363 lock (directory)
365 AssureOpen();
366 CreateIndexWriter();
367 indexWriter.Optimize();
371 /// <summary> If non-null, information about merges and a message when
372 /// {@link #GetMaxFieldLength()} is reached will be printed to this.
373 /// <p>Example: <tt>index.setInfoStream(System.err);</tt>
374 /// </summary>
375 /// <seealso cref="IndexWriter.SetInfoStream(PrintStream)">
376 /// </seealso>
377 /// <throws> IllegalStateException if the index is closed </throws>
378 public virtual void SetInfoStream(System.IO.StreamWriter infoStream)
380 lock (directory)
382 AssureOpen();
383 if (indexWriter != null)
385 indexWriter.SetInfoStream(infoStream);
387 this.infoStream = infoStream;
391 /// <throws> IOException </throws>
392 /// <seealso cref="IndexModifier.SetInfoStream(PrintStream)">
393 /// </seealso>
394 public virtual System.IO.TextWriter GetInfoStream()
396 lock (directory)
398 AssureOpen();
399 CreateIndexWriter();
400 return indexWriter.GetInfoStream();
404 /// <summary> Setting to turn on usage of a compound file. When on, multiple files
405 /// for each segment are merged into a single file once the segment creation
406 /// is finished. This is done regardless of what directory is in use.
407 /// </summary>
408 /// <seealso cref="IndexWriter.SetUseCompoundFile(boolean)">
409 /// </seealso>
410 /// <throws> IllegalStateException if the index is closed </throws>
411 public virtual void SetUseCompoundFile(bool useCompoundFile)
413 lock (directory)
415 AssureOpen();
416 if (indexWriter != null)
418 indexWriter.SetUseCompoundFile(useCompoundFile);
420 this.useCompoundFile = useCompoundFile;
424 /// <throws> IOException </throws>
425 /// <seealso cref="IndexModifier.SetUseCompoundFile(boolean)">
426 /// </seealso>
427 public virtual bool GetUseCompoundFile()
429 lock (directory)
431 AssureOpen();
432 CreateIndexWriter();
433 return indexWriter.GetUseCompoundFile();
437 /// <summary> The maximum number of terms that will be indexed for a single field in a
438 /// document. This limits the amount of memory required for indexing, so that
439 /// collections with very large files will not crash the indexing process by
440 /// running out of memory.<p/>
441 /// Note that this effectively truncates large documents, excluding from the
442 /// index terms that occur further in the document. If you know your source
443 /// documents are large, be sure to set this value high enough to accomodate
444 /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
445 /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
446 /// By default, no more than 10,000 terms will be indexed for a field.
447 /// </summary>
448 /// <seealso cref="IndexWriter.SetMaxFieldLength(int)">
449 /// </seealso>
450 /// <throws> IllegalStateException if the index is closed </throws>
451 public virtual void SetMaxFieldLength(int maxFieldLength)
453 lock (directory)
455 AssureOpen();
456 if (indexWriter != null)
458 indexWriter.SetMaxFieldLength(maxFieldLength);
460 this.maxFieldLength = maxFieldLength;
464 /// <throws> IOException </throws>
465 /// <seealso cref="IndexModifier.SetMaxFieldLength(int)">
466 /// </seealso>
467 public virtual int GetMaxFieldLength()
469 lock (directory)
471 AssureOpen();
472 CreateIndexWriter();
473 return indexWriter.GetMaxFieldLength();
477 /// <summary> The maximum number of terms that will be indexed for a single field in a
478 /// document. This limits the amount of memory required for indexing, so that
479 /// collections with very large files will not crash the indexing process by
480 /// running out of memory.<p/>
481 /// Note that this effectively truncates large documents, excluding from the
482 /// index terms that occur further in the document. If you know your source
483 /// documents are large, be sure to set this value high enough to accomodate
484 /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
485 /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
486 /// By default, no more than 10,000 terms will be indexed for a field.
487 /// </summary>
488 /// <seealso cref="IndexWriter.SetMaxBufferedDocs(int)">
489 /// </seealso>
490 /// <throws> IllegalStateException if the index is closed </throws>
491 public virtual void SetMaxBufferedDocs(int maxBufferedDocs)
493 lock (directory)
495 AssureOpen();
496 if (indexWriter != null)
498 indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
500 this.maxBufferedDocs = maxBufferedDocs;
504 /// <throws> IOException </throws>
505 /// <seealso cref="IndexModifier.SetMaxBufferedDocs(int)">
506 /// </seealso>
507 public virtual int GetMaxBufferedDocs()
509 lock (directory)
511 AssureOpen();
512 CreateIndexWriter();
513 return indexWriter.GetMaxBufferedDocs();
517 /// <summary> Determines how often segment indices are merged by addDocument(). With
518 /// smaller values, less RAM is used while indexing, and searches on
519 /// unoptimized indices are faster, but indexing speed is slower. With larger
520 /// values, more RAM is used during indexing, and while searches on unoptimized
521 /// indices are slower, indexing is faster. Thus larger values (&gt; 10) are best
522 /// for batch index creation, and smaller values (&lt; 10) for indices that are
523 /// interactively maintained.
524 /// <p>This must never be less than 2. The default value is 10.
525 ///
526 /// </summary>
527 /// <seealso cref="IndexWriter.SetMergeFactor(int)">
528 /// </seealso>
529 /// <throws> IllegalStateException if the index is closed </throws>
530 public virtual void SetMergeFactor(int mergeFactor)
532 lock (directory)
534 AssureOpen();
535 if (indexWriter != null)
537 indexWriter.SetMergeFactor(mergeFactor);
539 this.mergeFactor = mergeFactor;
543 /// <throws> IOException </throws>
544 /// <seealso cref="IndexModifier.SetMergeFactor(int)">
545 /// </seealso>
546 public virtual int GetMergeFactor()
548 lock (directory)
550 AssureOpen();
551 CreateIndexWriter();
552 return indexWriter.GetMergeFactor();
556 /// <summary> Close this index, writing all pending changes to disk.
557 ///
558 /// </summary>
559 /// <throws> IllegalStateException if the index has been closed before already </throws>
560 public virtual void Close()
562 lock (directory)
564 if (!open)
565 throw new System.SystemException("Index is closed already");
566 if (indexWriter != null)
568 indexWriter.Close();
569 indexWriter = null;
571 else
573 indexReader.Close();
574 indexReader = null;
576 open = false;
580 public override System.String ToString()
582 return "Index@" + directory;
586 // used as an example in the javadoc:
587 public static void main(String[] args) throws IOException {
588 Analyzer analyzer = new StandardAnalyzer();
589 // create an index in /tmp/index, overwriting an existing one:
590 IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
591 Document doc = new Document();
592 doc.add(new Field("id", "1", Field.Store.YES, Field.Index.UN_TOKENIZED));
593 doc.add(new Field("body", "a simple test", Field.Store.YES, Field.Index.TOKENIZED));
594 indexModifier.addDocument(doc);
595 int deleted = indexModifier.delete(new Term("id", "1"));
596 System.out.println("Deleted " + deleted + " document");
597 indexModifier.flush();
598 System.out.println(indexModifier.docCount() + " docs in index");
599 indexModifier.close();