2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 using Analyzer
= Lucene
.Net
.Analysis
.Analyzer
;
19 using Document
= Lucene
.Net
.Documents
.Document
;
20 using Similarity
= Lucene
.Net
.Search
.Similarity
;
21 using Directory
= Lucene
.Net
.Store
.Directory
;
22 using FSDirectory
= Lucene
.Net
.Store
.FSDirectory
;
23 using IndexInput
= Lucene
.Net
.Store
.IndexInput
;
24 using IndexOutput
= Lucene
.Net
.Store
.IndexOutput
;
25 using Lock
= Lucene
.Net
.Store
.Lock
;
26 using RAMDirectory
= Lucene
.Net
.Store
.RAMDirectory
;
28 namespace Lucene
.Net
.Index
32 /// <summary>An IndexWriter creates and maintains an index.
33 /// The third argument to the
34 /// <a href="#IndexWriter(Lucene.Net.store.Directory, Lucene.Net.analysis.Analyzer, boolean)"><b>constructor</b></a>
35 /// determines whether a new index is created, or whether an existing index is
36 /// opened for the addition of new documents.
37 /// In either case, documents are added with the <a
38 /// href="#addDocument(Lucene.Net.document.Document)"><b>addDocument</b></a> method.
39 /// When finished adding documents, <a href="#close()"><b>close</b></a> should be called.
40 /// <p>If an index will not have more documents added for a while and optimal search
41 /// performance is desired, then the <a href="#optimize()"><b>optimize</b></a>
42 /// method should be called before the index is closed.
44 /// <summary><p>Opening an IndexWriter creates a lock file for the directory in use. Trying to open
45 /// another IndexWriter on the same directory will lead to an IOException. The IOException
46 /// is also thrown if an IndexReader on the same directory is used to delete documents
49 /// <seealso cref="IndexModifier IndexModifier supports the important methods of IndexWriter plus deletion">
52 public class IndexWriter
54 private class AnonymousClassWith
: Lock
.With
56 private void InitBlock(bool create
, IndexWriter enclosingInstance
)
59 this.enclosingInstance
= enclosingInstance
;
62 private IndexWriter enclosingInstance
;
63 public IndexWriter Enclosing_Instance
67 return enclosingInstance
;
71 internal AnonymousClassWith(bool create
, IndexWriter enclosingInstance
, Lucene
.Net
.Store
.Lock Param1
, long Param2
) : base(Param1
, Param2
)
73 InitBlock(create
, enclosingInstance
);
75 public override System
.Object
DoBody()
78 Enclosing_Instance
.segmentInfos
.Write(Enclosing_Instance
.directory
);
80 Enclosing_Instance
.segmentInfos
.Read(Enclosing_Instance
.directory
);
84 private class AnonymousClassWith1
: Lock
.With
86 private void InitBlock(System
.Collections
.ArrayList segmentsToDelete
, IndexWriter enclosingInstance
)
88 this.segmentsToDelete
= segmentsToDelete
;
89 this.enclosingInstance
= enclosingInstance
;
91 private System
.Collections
.ArrayList segmentsToDelete
;
92 private IndexWriter enclosingInstance
;
93 public IndexWriter Enclosing_Instance
97 return enclosingInstance
;
101 internal AnonymousClassWith1(System
.Collections
.ArrayList segmentsToDelete
, IndexWriter enclosingInstance
, Lucene
.Net
.Store
.Lock Param1
, long Param2
):base(Param1
, Param2
)
103 InitBlock(segmentsToDelete
, enclosingInstance
);
105 public override System
.Object
DoBody()
107 Enclosing_Instance
.segmentInfos
.Write(Enclosing_Instance
.directory
); // commit changes
108 Enclosing_Instance
.DeleteSegments(segmentsToDelete
); // delete now-unused segments
112 private class AnonymousClassWith2
: Lock
.With
114 private void InitBlock(System
.String mergedName
, System
.Collections
.ArrayList filesToDelete
, IndexWriter enclosingInstance
)
116 this.mergedName
= mergedName
;
117 this.filesToDelete
= filesToDelete
;
118 this.enclosingInstance
= enclosingInstance
;
120 private System
.String mergedName
;
121 private System
.Collections
.ArrayList filesToDelete
;
122 private IndexWriter enclosingInstance
;
123 public IndexWriter Enclosing_Instance
127 return enclosingInstance
;
131 internal AnonymousClassWith2(System
.String mergedName
, System
.Collections
.ArrayList filesToDelete
, IndexWriter enclosingInstance
, Lucene
.Net
.Store
.Lock Param1
, long Param2
):base(Param1
, Param2
)
133 InitBlock(mergedName
, filesToDelete
, enclosingInstance
);
135 public override System
.Object
DoBody()
137 // make compound file visible for SegmentReaders
138 Enclosing_Instance
.directory
.RenameFile(mergedName
+ ".tmp", mergedName
+ ".cfs");
139 // delete now unused files of segment
140 Enclosing_Instance
.DeleteFiles(filesToDelete
);
144 private class AnonymousClassWith3
: Lock
.With
146 private void InitBlock(System
.Collections
.ArrayList segmentsToDelete
, IndexWriter enclosingInstance
)
148 this.segmentsToDelete
= segmentsToDelete
;
149 this.enclosingInstance
= enclosingInstance
;
151 private System
.Collections
.ArrayList segmentsToDelete
;
152 private IndexWriter enclosingInstance
;
153 public IndexWriter Enclosing_Instance
157 return enclosingInstance
;
161 internal AnonymousClassWith3(System
.Collections
.ArrayList segmentsToDelete
, IndexWriter enclosingInstance
, Lucene
.Net
.Store
.Lock Param1
, long Param2
):base(Param1
, Param2
)
163 InitBlock(segmentsToDelete
, enclosingInstance
);
165 public override System
.Object
DoBody()
167 Enclosing_Instance
.segmentInfos
.Write(Enclosing_Instance
.directory
); // commit before deleting
168 Enclosing_Instance
.DeleteSegments(segmentsToDelete
); // delete now-unused segments
172 private class AnonymousClassWith4
: Lock
.With
174 private void InitBlock(System
.String mergedName
, System
.Collections
.ArrayList filesToDelete
, IndexWriter enclosingInstance
)
176 this.mergedName
= mergedName
;
177 this.filesToDelete
= filesToDelete
;
178 this.enclosingInstance
= enclosingInstance
;
180 private System
.String mergedName
;
181 private System
.Collections
.ArrayList filesToDelete
;
182 private IndexWriter enclosingInstance
;
183 public IndexWriter Enclosing_Instance
187 return enclosingInstance
;
191 internal AnonymousClassWith4(System
.String mergedName
, System
.Collections
.ArrayList filesToDelete
, IndexWriter enclosingInstance
, Lucene
.Net
.Store
.Lock Param1
, long Param2
):base(Param1
, Param2
)
193 InitBlock(mergedName
, filesToDelete
, enclosingInstance
);
195 public override System
.Object
DoBody()
197 // make compound file visible for SegmentReaders
198 Enclosing_Instance
.directory
.RenameFile(mergedName
+ ".tmp", mergedName
+ ".cfs");
199 // delete now unused files of segment
200 Enclosing_Instance
.DeleteFiles(filesToDelete
);
204 private void InitBlock()
206 similarity
= Similarity
.GetDefault();
209 /// <summary> Default value is 1,000.</summary>
210 public const long WRITE_LOCK_TIMEOUT
= 1000;
212 /// <summary> Default value is 10,000.</summary>
213 public const long COMMIT_LOCK_TIMEOUT
= 10000;
215 public const System
.String WRITE_LOCK_NAME
= "write.lock";
216 public const System
.String COMMIT_LOCK_NAME
= "commit.lock";
218 /// <summary> Default value is 10. Change using {@link #SetMergeFactor(int)}.</summary>
219 public const int DEFAULT_MERGE_FACTOR
= 10;
221 /// <summary> Default value is 10. Change using {@link #SetMaxBufferedDocs(int)}.</summary>
222 public const int DEFAULT_MAX_BUFFERED_DOCS
= 10;
224 /// <deprecated> use {@link #DEFAULT_MAX_BUFFERED_DOCS} instead
226 public static readonly int DEFAULT_MIN_MERGE_DOCS
= DEFAULT_MAX_BUFFERED_DOCS
;
228 /// <summary> Default value is {@link Integer#MAX_VALUE}. Change using {@link #SetMaxMergeDocs(int)}.</summary>
229 public static readonly int DEFAULT_MAX_MERGE_DOCS
= System
.Int32
.MaxValue
;
231 /// <summary> Default value is 10,000. Change using {@link #SetMaxFieldLength(int)}.</summary>
232 public const int DEFAULT_MAX_FIELD_LENGTH
= 10000;
234 /// <summary> Default value is 128. Change using {@link #SetTermIndexInterval(int)}.</summary>
235 public const int DEFAULT_TERM_INDEX_INTERVAL
= 128;
237 private Directory directory
; // where this index resides
238 private Analyzer analyzer
; // how to analyze text
240 private Similarity similarity
; // how to normalize
242 private SegmentInfos segmentInfos
= new SegmentInfos(); // the segments
243 private Directory ramDirectory
= new RAMDirectory(); // for temp segs
245 private int singleDocSegmentsCount
= 0; // for speeding decision on merge candidates
247 private Lock writeLock
;
249 private int termIndexInterval
= DEFAULT_TERM_INDEX_INTERVAL
;
251 /// <summary>Use compound file setting. Defaults to true, minimizing the number of
252 /// files used. Setting this to false may improve indexing performance, but
253 /// may also cause file handle problems.
255 private bool useCompoundFile
= true;
257 private bool closeDir
;
259 /// <summary>Get the current setting of whether to use the compound file format.
260 /// Note that this just returns the value you set with setUseCompoundFile(boolean)
261 /// or the default. You cannot use this to query the status of an existing index.
263 /// <seealso cref="SetUseCompoundFile(boolean)">
265 public virtual bool GetUseCompoundFile()
267 return useCompoundFile
;
270 /// <summary>Setting to turn on usage of a compound file. When on, multiple files
271 /// for each segment are merged into a single file once the segment creation
272 /// is finished. This is done regardless of what directory is in use.
274 public virtual void SetUseCompoundFile(bool value_Renamed
)
276 useCompoundFile
= value_Renamed
;
279 /// <summary>Expert: Set the Similarity implementation used by this IndexWriter.
282 /// <seealso cref="Similarity.SetDefault(Similarity)">
284 public virtual void SetSimilarity(Similarity similarity
)
286 this.similarity
= similarity
;
289 /// <summary>Expert: Return the Similarity implementation used by this IndexWriter.
291 /// <p>This defaults to the current value of {@link Similarity#GetDefault()}.
293 public virtual Similarity
GetSimilarity()
295 return this.similarity
;
298 /// <summary>Expert: Set the interval between indexed terms. Large values cause less
299 /// memory to be used by IndexReader, but slow random-access to terms. Small
300 /// values cause more memory to be used by an IndexReader, and speed
301 /// random-access to terms.
303 /// This parameter determines the amount of computation required per query
304 /// term, regardless of the number of documents that contain that term. In
305 /// particular, it is the maximum number of other terms that must be
306 /// scanned before a term is located and its frequency and position information
307 /// may be processed. In a large index with user-entered query terms, query
308 /// processing time is likely to be dominated not by term lookup but rather
309 /// by the processing of frequency and positional data. In a small index
310 /// or when many uncommon query terms are generated (e.g., by wildcard
311 /// queries) term lookup may become a dominant cost.
313 /// In particular, <code>numUniqueTerms/interval</code> terms are read into
314 /// memory by an IndexReader, and, on average, <code>interval/2</code> terms
315 /// must be scanned for each random term access.
318 /// <seealso cref="DEFAULT_TERM_INDEX_INTERVAL">
320 public virtual void SetTermIndexInterval(int interval
)
322 this.termIndexInterval
= interval
;
325 /// <summary>Expert: Return the interval between indexed terms.
328 /// <seealso cref="SetTermIndexInterval(int)">
330 public virtual int GetTermIndexInterval()
332 return termIndexInterval
;
335 /// <summary> Constructs an IndexWriter for the index in <code>path</code>.
336 /// Text will be analyzed with <code>a</code>. If <code>create</code>
337 /// is true, then a new, empty index will be created in
338 /// <code>path</code>, replacing the index already there, if any.
341 /// <param name="path">the path to the index directory
343 /// <param name="a">the analyzer to use
345 /// <param name="create"><code>true</code> to create the index or overwrite
346 /// the existing one; <code>false</code> to append to the existing
349 /// <throws> IOException if the directory cannot be read/written to, or </throws>
350 /// <summary> if it does not exist, and <code>create</code> is
351 /// <code>false</code>
353 public IndexWriter(System
.String path
, Analyzer a
, bool create
) : this(FSDirectory
.GetDirectory(path
, create
), a
, create
, true)
357 /// <summary> Constructs an IndexWriter for the index in <code>path</code>.
358 /// Text will be analyzed with <code>a</code>. If <code>create</code>
359 /// is true, then a new, empty index will be created in
360 /// <code>path</code>, replacing the index already there, if any.
363 /// <param name="path">the path to the index directory
365 /// <param name="a">the analyzer to use
367 /// <param name="create"><code>true</code> to create the index or overwrite
368 /// the existing one; <code>false</code> to append to the existing
371 /// <throws> IOException if the directory cannot be read/written to, or </throws>
372 /// <summary> if it does not exist, and <code>create</code> is
373 /// <code>false</code>
375 public IndexWriter(System
.IO
.FileInfo path
, Analyzer a
, bool create
) : this(FSDirectory
.GetDirectory(path
, create
), a
, create
, true)
379 /// <summary> Constructs an IndexWriter for the index in <code>d</code>.
380 /// Text will be analyzed with <code>a</code>. If <code>create</code>
381 /// is true, then a new, empty index will be created in
382 /// <code>d</code>, replacing the index already there, if any.
385 /// <param name="d">the index directory
387 /// <param name="a">the analyzer to use
389 /// <param name="create"><code>true</code> to create the index or overwrite
390 /// the existing one; <code>false</code> to append to the existing
393 /// <throws> IOException if the directory cannot be read/written to, or </throws>
394 /// <summary> if it does not exist, and <code>create</code> is
395 /// <code>false</code>
397 public IndexWriter(Directory d
, Analyzer a
, bool create
) : this(d
, a
, create
, false)
401 private IndexWriter(Directory d
, Analyzer a
, bool create
, bool closeDir
)
404 this.closeDir
= closeDir
;
408 Lock writeLock
= directory
.MakeLock(IndexWriter
.WRITE_LOCK_NAME
);
409 if (!writeLock
.Obtain(WRITE_LOCK_TIMEOUT
))
412 throw new System
.IO
.IOException("Index locked for write: " + writeLock
);
414 this.writeLock
= writeLock
; // save it
418 // in- & inter-process sync
419 new AnonymousClassWith(create
, this, directory
.MakeLock(IndexWriter
.COMMIT_LOCK_NAME
), COMMIT_LOCK_TIMEOUT
).Run();
423 /// <summary>Determines the largest number of documents ever merged by addDocument().
424 /// Small values (e.g., less than 10,000) are best for interactive indexing,
425 /// as this limits the length of pauses while indexing to a few seconds.
426 /// Larger values are best for batched indexing and speedier searches.
428 /// <p>The default value is {@link Integer#MAX_VALUE}.
430 public virtual void SetMaxMergeDocs(int maxMergeDocs
)
432 this.maxMergeDocs
= maxMergeDocs
;
435 /// <seealso cref="setMaxMergeDocs">
437 public virtual int GetMaxMergeDocs()
442 /// <summary> The maximum number of terms that will be indexed for a single field in a
443 /// document. This limits the amount of memory required for indexing, so that
444 /// collections with very large files will not crash the indexing process by
445 /// running out of memory.<p/>
446 /// Note that this effectively truncates large documents, excluding from the
447 /// index terms that occur further in the document. If you know your source
448 /// documents are large, be sure to set this value high enough to accomodate
449 /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
450 /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
451 /// By default, no more than 10,000 terms will be indexed for a field.
453 public virtual void SetMaxFieldLength(int maxFieldLength
)
455 this.maxFieldLength
= maxFieldLength
;
458 /// <seealso cref="setMaxFieldLength">
460 public virtual int GetMaxFieldLength()
462 return maxFieldLength
;
465 /// <summary>Determines the minimal number of documents required before the buffered
466 /// in-memory documents are merging and a new Segment is created.
467 /// Since Documents are merged in a {@link Lucene.Net.store.RAMDirectory},
468 /// large value gives faster indexing. At the same time, mergeFactor limits
469 /// the number of files open in a FSDirectory.
471 /// <p> The default value is 10.
474 /// <throws> IllegalArgumentException if maxBufferedDocs is smaller than 1 </throws>
475 public virtual void SetMaxBufferedDocs(int maxBufferedDocs
)
477 if (maxBufferedDocs
< 2)
478 throw new System
.ArgumentException("maxBufferedDocs must at least be 2");
479 this.minMergeDocs
= maxBufferedDocs
;
482 /// <seealso cref="setMaxBufferedDocs">
484 public virtual int GetMaxBufferedDocs()
489 /// <summary>Determines how often segment indices are merged by addDocument(). With
490 /// smaller values, less RAM is used while indexing, and searches on
491 /// unoptimized indices are faster, but indexing speed is slower. With larger
492 /// values, more RAM is used during indexing, and while searches on unoptimized
493 /// indices are slower, indexing is faster. Thus larger values (> 10) are best
494 /// for batch index creation, and smaller values (< 10) for indices that are
495 /// interactively maintained.
497 /// <p>This must never be less than 2. The default value is 10.
499 public virtual void SetMergeFactor(int mergeFactor
)
502 throw new System
.ArgumentException("mergeFactor cannot be less than 2");
503 this.mergeFactor
= mergeFactor
;
506 /// <seealso cref="setMergeFactor">
508 public virtual int GetMergeFactor()
513 /// <summary>If non-null, information about merges and a message when
514 /// maxFieldLength is reached will be printed to this.
516 public virtual void SetInfoStream(System
.IO
.TextWriter infoStream
)
518 this.infoStream
= infoStream
;
521 /// <seealso cref="setInfoStream">
523 public virtual System
.IO
.TextWriter
GetInfoStream()
528 /// <summary>Flushes all changes to an index and closes all associated files. </summary>
529 public virtual void Close()
534 ramDirectory
.Close();
535 if (writeLock
!= null)
537 writeLock
.Release(); // release write lock
542 System
.GC
.SuppressFinalize(this);
546 /// <summary>Release the write lock, if needed. </summary>
549 if (writeLock
!= null)
551 writeLock
.Release(); // release write lock
556 /// <summary>Returns the Directory used by this index. </summary>
557 public virtual Directory
GetDirectory()
562 /// <summary>Returns the analyzer used by this index. </summary>
563 public virtual Analyzer
GetAnalyzer()
569 /// <summary>Returns the number of documents currently in this index. </summary>
570 public virtual int DocCount()
575 for (int i
= 0; i
< segmentInfos
.Count
; i
++)
577 SegmentInfo si
= segmentInfos
.Info(i
);
578 count
+= si
.docCount
;
584 /// <summary> The maximum number of terms that will be indexed for a single field in a
585 /// document. This limits the amount of memory required for indexing, so that
586 /// collections with very large files will not crash the indexing process by
587 /// running out of memory.<p/>
588 /// Note that this effectively truncates large documents, excluding from the
589 /// index terms that occur further in the document. If you know your source
590 /// documents are large, be sure to set this value high enough to accomodate
591 /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
592 /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
593 /// By default, no more than 10,000 terms will be indexed for a field.
596 /// <deprecated> use {@link #setMaxFieldLength} instead
598 public int maxFieldLength
= DEFAULT_MAX_FIELD_LENGTH
;
600 /// <summary> Adds a document to this index. If the document contains more than
601 /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
604 public virtual void AddDocument(Document doc
)
606 AddDocument(doc
, analyzer
);
609 /// <summary> Adds a document to this index, using the provided analyzer instead of the
610 /// value of {@link #GetAnalyzer()}. If the document contains more than
611 /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
614 public virtual void AddDocument(Document doc
, Analyzer analyzer
)
616 DocumentWriter dw
= new DocumentWriter(ramDirectory
, analyzer
, this);
617 dw
.SetInfoStream(infoStream
);
618 System
.String segmentName
= NewSegmentName();
619 dw
.AddDocument(segmentName
, doc
);
622 segmentInfos
.Add(new SegmentInfo(segmentName
, 1, ramDirectory
));
623 singleDocSegmentsCount
++;
624 MaybeMergeSegments();
628 internal int GetSegmentsCounter()
630 return segmentInfos
.counter
;
633 private System
.String
NewSegmentName()
637 return "_" + SupportClass
.Number
.ToString(segmentInfos
.counter
++, SupportClass
.Number
.MAX_RADIX
);
641 /// <summary>Determines how often segment indices are merged by addDocument(). With
642 /// smaller values, less RAM is used while indexing, and searches on
643 /// unoptimized indices are faster, but indexing speed is slower. With larger
644 /// values, more RAM is used during indexing, and while searches on unoptimized
645 /// indices are slower, indexing is faster. Thus larger values (> 10) are best
646 /// for batch index creation, and smaller values (< 10) for indices that are
647 /// interactively maintained.
649 /// <p>This must never be less than 2. The default value is 10.
651 /// <deprecated> use {@link #setMergeFactor} instead
653 public int mergeFactor
= DEFAULT_MERGE_FACTOR
;
655 /// <summary>Determines the minimal number of documents required before the buffered
656 /// in-memory documents are merging and a new Segment is created.
657 /// Since Documents are merged in a {@link Lucene.Net.store.RAMDirectory},
658 /// large value gives faster indexing. At the same time, mergeFactor limits
659 /// the number of files open in a FSDirectory.
661 /// <p> The default value is 10.
663 /// <deprecated> use {@link #setMaxBufferedDocs} instead
665 public int minMergeDocs
= DEFAULT_MIN_MERGE_DOCS
;
668 /// <summary>Determines the largest number of documents ever merged by addDocument().
669 /// Small values (e.g., less than 10,000) are best for interactive indexing,
670 /// as this limits the length of pauses while indexing to a few seconds.
671 /// Larger values are best for batched indexing and speedier searches.
673 /// <p>The default value is {@link Integer#MAX_VALUE}.
675 /// <deprecated> use {@link #setMaxMergeDocs} instead
677 public int maxMergeDocs
= DEFAULT_MAX_MERGE_DOCS
;
679 /// <summary>If non-null, information about merges will be printed to this.</summary>
680 /// <deprecated> use {@link #setInfoStream} instead
682 public System
.IO
.TextWriter infoStream
= null;
684 /// <summary>Merges all segments together into a single segment, optimizing an index
687 public virtual void Optimize()
692 while (segmentInfos
.Count
> 1 || (segmentInfos
.Count
== 1 && (SegmentReader
.HasDeletions(segmentInfos
.Info(0)) || segmentInfos
.Info(0).dir
!= directory
|| (useCompoundFile
&& (!SegmentReader
.UsesCompoundFile(segmentInfos
.Info(0)) || SegmentReader
.HasSeparateNorms(segmentInfos
.Info(0)))))))
694 int minSegment
= segmentInfos
.Count
- mergeFactor
;
695 MergeSegments(minSegment
< 0?0:minSegment
);
700 /// <summary>Merges all segments from an array of indexes into this index.
702 /// <p>This may be used to parallelize batch indexing. A large document
703 /// collection can be broken into sub-collections. Each sub-collection can be
704 /// indexed in parallel, on a different thread, process or machine. The
705 /// complete index can then be created by merging sub-collection indexes
706 /// with this method.
708 /// <p>After this completes, the index is optimized.
710 public virtual void AddIndexes(Directory
[] dirs
)
714 Optimize(); // start with zero or 1 seg
716 int start
= segmentInfos
.Count
;
718 for (int i
= 0; i
< dirs
.Length
; i
++)
720 SegmentInfos sis
= new SegmentInfos(); // read infos from dir
722 for (int j
= 0; j
< sis
.Count
; j
++)
724 segmentInfos
.Add(sis
.Info(j
)); // add each info
728 // merge newly added segments in log(n) passes
729 while (segmentInfos
.Count
> start
+ mergeFactor
)
731 for (int base_Renamed
= start
+ 1; base_Renamed
< segmentInfos
.Count
; base_Renamed
++)
733 int end
= System
.Math
.Min(segmentInfos
.Count
, base_Renamed
+ mergeFactor
);
734 if (end
- base_Renamed
> 1)
735 MergeSegments(base_Renamed
, end
);
739 Optimize(); // final cleanup
743 /// <summary>Merges the provided indexes into this index.
744 /// <p>After this completes, the index is optimized. </p>
745 /// <p>The provided IndexReaders are not closed.</p>
747 public virtual void AddIndexes(IndexReader
[] readers
)
752 Optimize(); // start with zero or 1 seg
754 System
.String mergedName
= NewSegmentName();
755 SegmentMerger merger
= new SegmentMerger(this, mergedName
);
757 System
.Collections
.ArrayList segmentsToDelete
= System
.Collections
.ArrayList
.Synchronized(new System
.Collections
.ArrayList(10));
758 IndexReader sReader
= null;
759 if (segmentInfos
.Count
== 1)
761 // add existing index, if any
762 sReader
= SegmentReader
.Get(segmentInfos
.Info(0));
764 segmentsToDelete
.Add(sReader
); // queue segment for deletion
767 for (int i
= 0; i
< readers
.Length
; i
++)
769 merger
.Add(readers
[i
]);
771 int docCount
= merger
.Merge(); // merge 'em
773 segmentInfos
.RemoveRange(0, segmentInfos
.Count
- 0); // pop old infos & add new
774 segmentInfos
.Add(new SegmentInfo(mergedName
, docCount
, directory
));
781 // in- & inter-process sync
782 new AnonymousClassWith1(segmentsToDelete
, this, directory
.MakeLock(COMMIT_LOCK_NAME
), COMMIT_LOCK_TIMEOUT
).Run();
787 System
.Collections
.ArrayList filesToDelete
= merger
.CreateCompoundFile(mergedName
+ ".tmp");
790 // in- & inter-process sync
791 new AnonymousClassWith2(mergedName
, filesToDelete
, this, directory
.MakeLock(COMMIT_LOCK_NAME
), COMMIT_LOCK_TIMEOUT
).Run();
797 /// <summary>Merges all RAM-resident segments. </summary>
798 private void FlushRamSegments()
800 int minSegment
= segmentInfos
.Count
- 1;
802 while (minSegment
>= 0 && (segmentInfos
.Info(minSegment
)).dir
== ramDirectory
)
804 docCount
+= segmentInfos
.Info(minSegment
).docCount
;
807 if (minSegment
< 0 || (docCount
+ segmentInfos
.Info(minSegment
).docCount
) > mergeFactor
|| !(segmentInfos
.Info(segmentInfos
.Count
- 1).dir
== ramDirectory
))
809 if (minSegment
>= segmentInfos
.Count
)
810 return ; // none to merge
811 MergeSegments(minSegment
);
814 /// <summary>Incremental segment merger. </summary>
815 private void MaybeMergeSegments()
817 long targetMergeDocs
= minMergeDocs
;
818 while (targetMergeDocs
<= maxMergeDocs
)
820 // find segments smaller than current target size
821 int minSegment
= segmentInfos
.Count
- singleDocSegmentsCount
; // top 1-doc segments are taken for sure
822 int mergeDocs
= singleDocSegmentsCount
;
823 while (--minSegment
>= 0)
825 SegmentInfo si
= segmentInfos
.Info(minSegment
);
826 if (si
.docCount
>= targetMergeDocs
)
828 mergeDocs
+= si
.docCount
;
831 if (mergeDocs
>= targetMergeDocs
) {
832 // found a merge to do
833 MergeSegments(minSegment
+ 1);
834 singleDocSegmentsCount
= 0;
838 targetMergeDocs
*= mergeFactor
; // increase target size
842 /// <summary>Pops segments off of segmentInfos stack down to minSegment, merges them,
843 /// and pushes the merged index onto the top of the segmentInfos stack.
845 private void MergeSegments(int minSegment
)
847 MergeSegments(minSegment
, segmentInfos
.Count
);
850 /// <summary>Merges the named range of segments, replacing them in the stack with a
853 private void MergeSegments(int minSegment
, int end
)
855 System
.String mergedName
= NewSegmentName();
856 if (infoStream
!= null)
857 infoStream
.Write("merging segments");
858 SegmentMerger merger
= new SegmentMerger(this, mergedName
);
860 System
.Collections
.ArrayList segmentsToDelete
= System
.Collections
.ArrayList
.Synchronized(new System
.Collections
.ArrayList(10));
861 for (int i
= minSegment
; i
< end
; i
++)
863 SegmentInfo si
= segmentInfos
.Info(i
);
864 if (infoStream
!= null)
865 infoStream
.Write(" " + si
.name
+ " (" + si
.docCount
+ " docs)");
866 IndexReader reader
= SegmentReader
.Get(si
);
868 if ((reader
.Directory() == this.directory
) || (reader
.Directory() == this.ramDirectory
))
869 segmentsToDelete
.Add(reader
); // queue segment for deletion
872 int mergedDocCount
= merger
.Merge();
874 if (infoStream
!= null)
876 infoStream
.WriteLine(" into " + mergedName
+ " (" + mergedDocCount
+ " docs)");
879 for (int i
= end
- 1; i
>= minSegment
; i
--)
880 // remove old infos & add new
881 segmentInfos
.RemoveAt(i
);
882 segmentInfos
.Add(new SegmentInfo(mergedName
, mergedDocCount
, directory
));
884 // close readers before we attempt to delete now-obsolete segments
885 merger
.CloseReaders();
889 // in- & inter-process sync
890 new AnonymousClassWith3(segmentsToDelete
, this, directory
.MakeLock(COMMIT_LOCK_NAME
), COMMIT_LOCK_TIMEOUT
).Run();
895 System
.Collections
.ArrayList filesToDelete
= merger
.CreateCompoundFile(mergedName
+ ".tmp");
898 // in- & inter-process sync
899 new AnonymousClassWith4(mergedName
, filesToDelete
, this, directory
.MakeLock(COMMIT_LOCK_NAME
), COMMIT_LOCK_TIMEOUT
).Run();
905 * Some operating systems (e.g. Windows) don't permit a file to be deleted
906 * while it is opened for read (e.g. by another process or thread). So we
907 * assume that when a delete fails it is because the file is open in another
908 * process, and queue the file for subsequent deletion.
911 private void DeleteSegments(System
.Collections
.ArrayList segments
)
913 System
.Collections
.ArrayList deletable
= System
.Collections
.ArrayList
.Synchronized(new System
.Collections
.ArrayList(10));
915 DeleteFiles(ReadDeleteableFiles(), deletable
); // try to delete deleteable
917 for (int i
= 0; i
< segments
.Count
; i
++)
919 SegmentReader reader
= (SegmentReader
) segments
[i
];
920 if (reader
.Directory() == this.directory
)
921 DeleteFiles(reader
.Files(), deletable
);
922 // try to delete our files
924 DeleteFiles(reader
.Files(), reader
.Directory()); // delete other files
927 WriteDeleteableFiles(deletable
); // note files we can't delete
930 private void DeleteFiles(System
.Collections
.ArrayList files
)
932 System
.Collections
.ArrayList deletable
= System
.Collections
.ArrayList
.Synchronized(new System
.Collections
.ArrayList(10));
933 DeleteFiles(ReadDeleteableFiles(), deletable
); // try to delete deleteable
934 DeleteFiles(files
, deletable
); // try to delete our files
935 WriteDeleteableFiles(deletable
); // note files we can't delete
938 private void DeleteFiles(System
.Collections
.ArrayList files
, Directory directory
)
940 for (int i
= 0; i
< files
.Count
; i
++)
941 directory
.DeleteFile((System
.String
) files
[i
]);
944 private void DeleteFiles(System
.Collections
.ArrayList files
, System
.Collections
.ArrayList deletable
)
946 for (int i
= 0; i
< files
.Count
; i
++)
948 System
.String file
= (System
.String
) files
[i
];
951 directory
.DeleteFile(file
); // try to delete each file
953 catch (System
.IO
.IOException e
)
956 if (directory
.FileExists(file
))
958 if (infoStream
!= null)
960 infoStream
.WriteLine(e
.ToString() + "; Will re-try later.");
962 deletable
.Add(file
); // add to deletable
968 private System
.Collections
.ArrayList
ReadDeleteableFiles()
970 System
.Collections
.ArrayList result
= System
.Collections
.ArrayList
.Synchronized(new System
.Collections
.ArrayList(10));
971 if (!directory
.FileExists(IndexFileNames
.DELETABLE
))
974 IndexInput input
= directory
.OpenInput(IndexFileNames
.DELETABLE
);
977 for (int i
= input
.ReadInt(); i
> 0; i
--)
979 result
.Add(input
.ReadString());
988 private void WriteDeleteableFiles(System
.Collections
.ArrayList files
)
990 IndexOutput output
= directory
.CreateOutput("deleteable.new");
993 output
.WriteInt(files
.Count
);
994 for (int i
= 0; i
< files
.Count
; i
++)
995 output
.WriteString((System
.String
) files
[i
]);
1001 directory
.RenameFile("deleteable.new", IndexFileNames
.DELETABLE
);