beagled/Lucene.Net/Index/IndexWriter.cs

   1 /*
   2  * Copyright 2004 The Apache Software Foundation
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 using System;
  18 using Analyzer = Lucene.Net.Analysis.Analyzer;
  19 using Document = Lucene.Net.Documents.Document;
  20 using Similarity = Lucene.Net.Search.Similarity;
  21 using Directory = Lucene.Net.Store.Directory;
  22 using FSDirectory = Lucene.Net.Store.FSDirectory;
  23 using IndexInput = Lucene.Net.Store.IndexInput;
  24 using IndexOutput = Lucene.Net.Store.IndexOutput;
  25 using Lock = Lucene.Net.Store.Lock;
  26 using RAMDirectory = Lucene.Net.Store.RAMDirectory;
  27
  28 namespace Lucene.Net.Index
  29 {
  30
  31
  32         /// <summary>An IndexWriter creates and maintains an index.
  33         /// The third argument to the
  34         /// <a href="#IndexWriter(Lucene.Net.store.Directory, Lucene.Net.analysis.Analyzer, boolean)"><b>constructor</b></a>
  35         /// determines whether a new index is created, or whether an existing index is
  36         /// opened for the addition of new documents.
  37         /// In either case, documents are added with the <a
  38         /// href="#addDocument(Lucene.Net.document.Document)"><b>addDocument</b></a> method.
  39         /// When finished adding documents, <a href="#close()"><b>close</b></a> should be called.
  40         /// <p>If an index will not have more documents added for a while and optimal search
  41         /// performance is desired, then the <a href="#optimize()"><b>optimize</b></a>
  42         /// method should be called before the index is closed.
  43         /// </summary>
  44         /// <summary><p>Opening an IndexWriter creates a lock file for the directory in use. Trying to open
  45         /// another IndexWriter on the same directory will lead to an IOException. The IOException
  46         /// is also thrown if an IndexReader on the same directory is used to delete documents
  47         /// from the index.
  48         /// </summary>
  49         /// <seealso cref="IndexModifier IndexModifier supports the important methods of IndexWriter plus deletion">
  50         /// </seealso>
  51
  52         public class IndexWriter
  53         {
  54                 private class AnonymousClassWith : Lock.With
  55                 {
  56                         private void  InitBlock(bool create, IndexWriter enclosingInstance)
  57                         {
  58                                 this.create = create;
  59                                 this.enclosingInstance = enclosingInstance;
  60                         }
  61                         private bool create;
  62                         private IndexWriter enclosingInstance;
  63                         public IndexWriter Enclosing_Instance
  64                         {
  65                                 get
  66                                 {
  67                                         return enclosingInstance;
  68                                 }
  69
  70                         }
  71                         internal AnonymousClassWith(bool create, IndexWriter enclosingInstance, Lucene.Net.Store.Lock Param1, long Param2) : base(Param1, Param2)
  72                         {
  73                                 InitBlock(create, enclosingInstance);
  74                         }
  75                         public override System.Object DoBody()
  76                         {
  77                                 if (create)
  78                                         Enclosing_Instance.segmentInfos.Write(Enclosing_Instance.directory);
  79                                 else
  80                                         Enclosing_Instance.segmentInfos.Read(Enclosing_Instance.directory);
  81                                 return null;
  82                         }
  83                 }
  84                 private class AnonymousClassWith1 : Lock.With
  85                 {
  86                         private void  InitBlock(System.Collections.ArrayList segmentsToDelete, IndexWriter enclosingInstance)
  87                         {
  88                                 this.segmentsToDelete = segmentsToDelete;
  89                                 this.enclosingInstance = enclosingInstance;
  90                         }
  91                         private System.Collections.ArrayList segmentsToDelete;
  92                         private IndexWriter enclosingInstance;
  93                         public IndexWriter Enclosing_Instance
  94                         {
  95                                 get
  96                                 {
  97                                         return enclosingInstance;
  98                                 }
  99
 100                         }
 101                         internal AnonymousClassWith1(System.Collections.ArrayList segmentsToDelete, IndexWriter enclosingInstance, Lucene.Net.Store.Lock Param1, long Param2):base(Param1, Param2)
 102                         {
 103                                 InitBlock(segmentsToDelete, enclosingInstance);
 104                         }
 105                         public override System.Object DoBody()
 106                         {
 107                                 Enclosing_Instance.segmentInfos.Write(Enclosing_Instance.directory); // commit changes
 108                                 Enclosing_Instance.DeleteSegments(segmentsToDelete); // delete now-unused segments
 109                                 return null;
 110                         }
 111                 }
 112                 private class AnonymousClassWith2 : Lock.With
 113                 {
 114                         private void  InitBlock(System.String mergedName, System.Collections.ArrayList filesToDelete, IndexWriter enclosingInstance)
 115                         {
 116                                 this.mergedName = mergedName;
 117                                 this.filesToDelete = filesToDelete;
 118                                 this.enclosingInstance = enclosingInstance;
 119                         }
 120                         private System.String mergedName;
 121                         private System.Collections.ArrayList filesToDelete;
 122                         private IndexWriter enclosingInstance;
 123                         public IndexWriter Enclosing_Instance
 124                         {
 125                                 get
 126                                 {
 127                                         return enclosingInstance;
 128                                 }
 129
 130                         }
 131                         internal AnonymousClassWith2(System.String mergedName, System.Collections.ArrayList filesToDelete, IndexWriter enclosingInstance, Lucene.Net.Store.Lock Param1, long Param2):base(Param1, Param2)
 132                         {
 133                                 InitBlock(mergedName, filesToDelete, enclosingInstance);
 134                         }
 135                         public override System.Object DoBody()
 136                         {
 137                                 // make compound file visible for SegmentReaders
 138                                 Enclosing_Instance.directory.RenameFile(mergedName + ".tmp", mergedName + ".cfs");
 139                                 // delete now unused files of segment
 140                                 Enclosing_Instance.DeleteFiles(filesToDelete);
 141                                 return null;
 142                         }
 143                 }
 144                 private class AnonymousClassWith3 : Lock.With
 145                 {
 146                         private void  InitBlock(System.Collections.ArrayList segmentsToDelete, IndexWriter enclosingInstance)
 147                         {
 148                                 this.segmentsToDelete = segmentsToDelete;
 149                                 this.enclosingInstance = enclosingInstance;
 150                         }
 151                         private System.Collections.ArrayList segmentsToDelete;
 152                         private IndexWriter enclosingInstance;
 153                         public IndexWriter Enclosing_Instance
 154                         {
 155                                 get
 156                                 {
 157                                         return enclosingInstance;
 158                                 }
 159
 160                         }
 161                         internal AnonymousClassWith3(System.Collections.ArrayList segmentsToDelete, IndexWriter enclosingInstance, Lucene.Net.Store.Lock Param1, long Param2):base(Param1, Param2)
 162                         {
 163                                 InitBlock(segmentsToDelete, enclosingInstance);
 164                         }
 165                         public override System.Object DoBody()
 166                         {
 167                                 Enclosing_Instance.segmentInfos.Write(Enclosing_Instance.directory); // commit before deleting
 168                                 Enclosing_Instance.DeleteSegments(segmentsToDelete); // delete now-unused segments
 169                                 return null;
 170                         }
 171                 }
 172                 private class AnonymousClassWith4 : Lock.With
 173                 {
 174                         private void  InitBlock(System.String mergedName, System.Collections.ArrayList filesToDelete, IndexWriter enclosingInstance)
 175                         {
 176                                 this.mergedName = mergedName;
 177                                 this.filesToDelete = filesToDelete;
 178                                 this.enclosingInstance = enclosingInstance;
 179                         }
 180                         private System.String mergedName;
 181                         private System.Collections.ArrayList filesToDelete;
 182                         private IndexWriter enclosingInstance;
 183                         public IndexWriter Enclosing_Instance
 184                         {
 185                                 get
 186                                 {
 187                                         return enclosingInstance;
 188                                 }
 189
 190                         }
 191                         internal AnonymousClassWith4(System.String mergedName, System.Collections.ArrayList filesToDelete, IndexWriter enclosingInstance, Lucene.Net.Store.Lock Param1, long Param2):base(Param1, Param2)
 192                         {
 193                                 InitBlock(mergedName, filesToDelete, enclosingInstance);
 194                         }
 195                         public override System.Object DoBody()
 196                         {
 197                                 // make compound file visible for SegmentReaders
 198                                 Enclosing_Instance.directory.RenameFile(mergedName + ".tmp", mergedName + ".cfs");
 199                                 // delete now unused files of segment
 200                                 Enclosing_Instance.DeleteFiles(filesToDelete);
 201                                 return null;
 202                         }
 203                 }
 204                 private void  InitBlock()
 205                 {
 206                         similarity = Similarity.GetDefault();
 207                 }
 208
 209                 /// <summary> Default value is 1,000.</summary>
 210                 public const long WRITE_LOCK_TIMEOUT = 1000;
 211
 212                 /// <summary> Default value is 10,000.</summary>
 213                 public const long COMMIT_LOCK_TIMEOUT = 10000;
 214
 215                 public const System.String WRITE_LOCK_NAME = "write.lock";
 216                 public const System.String COMMIT_LOCK_NAME = "commit.lock";
 217
 218                 /// <summary> Default value is 10. Change using {@link #SetMergeFactor(int)}.</summary>
 219                 public const int DEFAULT_MERGE_FACTOR = 10;
 220
 221                 /// <summary> Default value is 10. Change using {@link #SetMaxBufferedDocs(int)}.</summary>
 222                 public const int DEFAULT_MAX_BUFFERED_DOCS = 10;
 223
 224                 /// <deprecated> use {@link #DEFAULT_MAX_BUFFERED_DOCS} instead
 225                 /// </deprecated>
 226                 public static readonly int DEFAULT_MIN_MERGE_DOCS = DEFAULT_MAX_BUFFERED_DOCS;
 227
 228                 /// <summary> Default value is {@link Integer#MAX_VALUE}. Change using {@link #SetMaxMergeDocs(int)}.</summary>
 229                 public static readonly int DEFAULT_MAX_MERGE_DOCS = System.Int32.MaxValue;
 230
 231                 /// <summary> Default value is 10,000. Change using {@link #SetMaxFieldLength(int)}.</summary>
 232                 public const int DEFAULT_MAX_FIELD_LENGTH = 10000;
 233
 234                 /// <summary> Default value is 128. Change using {@link #SetTermIndexInterval(int)}.</summary>
 235                 public const int DEFAULT_TERM_INDEX_INTERVAL = 128;
 236
 237                 private Directory directory; // where this index resides
 238                 private Analyzer analyzer; // how to analyze text
 239
 240                 private Similarity similarity; // how to normalize
 241
 242                 private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
 243                 private Directory ramDirectory = new RAMDirectory(); // for temp segs
 244
 245                 private int singleDocSegmentsCount = 0; // for speeding decision on merge candidates
 246
 247                 private Lock writeLock;
 248
 249                 private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
 250
 251                 /// <summary>Use compound file setting. Defaults to true, minimizing the number of
 252                 /// files used.  Setting this to false may improve indexing performance, but
 253                 /// may also cause file handle problems.
 254                 /// </summary>
 255                 private bool useCompoundFile = true;
 256
 257                 private bool closeDir;
 258
 259                 /// <summary>Get the current setting of whether to use the compound file format.
 260                 /// Note that this just returns the value you set with setUseCompoundFile(boolean)
 261                 /// or the default. You cannot use this to query the status of an existing index.
 262                 /// </summary>
 263                 /// <seealso cref="SetUseCompoundFile(boolean)">
 264                 /// </seealso>
 265                 public virtual bool GetUseCompoundFile()
 266                 {
 267                         return useCompoundFile;
 268                 }
 269
 270                 /// <summary>Setting to turn on usage of a compound file. When on, multiple files
 271                 /// for each segment are merged into a single file once the segment creation
 272                 /// is finished. This is done regardless of what directory is in use.
 273                 /// </summary>
 274                 public virtual void  SetUseCompoundFile(bool value_Renamed)
 275                 {
 276                         useCompoundFile = value_Renamed;
 277                 }
 278
 279                 /// <summary>Expert: Set the Similarity implementation used by this IndexWriter.
 280                 ///
 281                 /// </summary>
 282                 /// <seealso cref="Similarity.SetDefault(Similarity)">
 283                 /// </seealso>
 284                 public virtual void  SetSimilarity(Similarity similarity)
 285                 {
 286                         this.similarity = similarity;
 287                 }
 288
 289                 /// <summary>Expert: Return the Similarity implementation used by this IndexWriter.
 290                 ///
 291                 /// <p>This defaults to the current value of {@link Similarity#GetDefault()}.
 292                 /// </summary>
 293                 public virtual Similarity GetSimilarity()
 294                 {
 295                         return this.similarity;
 296                 }
 297
 298                 /// <summary>Expert: Set the interval between indexed terms.  Large values cause less
 299                 /// memory to be used by IndexReader, but slow random-access to terms.  Small
 300                 /// values cause more memory to be used by an IndexReader, and speed
 301                 /// random-access to terms.
 302                 ///
 303                 /// This parameter determines the amount of computation required per query
 304                 /// term, regardless of the number of documents that contain that term.  In
 305                 /// particular, it is the maximum number of other terms that must be
 306                 /// scanned before a term is located and its frequency and position information
 307                 /// may be processed.  In a large index with user-entered query terms, query
 308                 /// processing time is likely to be dominated not by term lookup but rather
 309                 /// by the processing of frequency and positional data.  In a small index
 310                 /// or when many uncommon query terms are generated (e.g., by wildcard
 311                 /// queries) term lookup may become a dominant cost.
 312                 ///
 313                 /// In particular, <code>numUniqueTerms/interval</code> terms are read into
 314                 /// memory by an IndexReader, and, on average, <code>interval/2</code> terms
 315                 /// must be scanned for each random term access.
 316                 ///
 317                 /// </summary>
 318                 /// <seealso cref="DEFAULT_TERM_INDEX_INTERVAL">
 319                 /// </seealso>
 320                 public virtual void  SetTermIndexInterval(int interval)
 321                 {
 322                         this.termIndexInterval = interval;
 323                 }
 324
 325                 /// <summary>Expert: Return the interval between indexed terms.
 326                 ///
 327                 /// </summary>
 328                 /// <seealso cref="SetTermIndexInterval(int)">
 329                 /// </seealso>
 330                 public virtual int GetTermIndexInterval()
 331                 {
 332                         return termIndexInterval;
 333                 }
 334
 335                 /// <summary> Constructs an IndexWriter for the index in <code>path</code>.
 336                 /// Text will be analyzed with <code>a</code>.  If <code>create</code>
 337                 /// is true, then a new, empty index will be created in
 338                 /// <code>path</code>, replacing the index already there, if any.
 339                 ///
 340                 /// </summary>
 341                 /// <param name="path">the path to the index directory
 342                 /// </param>
 343                 /// <param name="a">the analyzer to use
 344                 /// </param>
 345                 /// <param name="create"><code>true</code> to create the index or overwrite
 346                 /// the existing one; <code>false</code> to append to the existing
 347                 /// index
 348                 /// </param>
 349                 /// <throws>  IOException if the directory cannot be read/written to, or </throws>
 350                 /// <summary>  if it does not exist, and <code>create</code> is
 351                 /// <code>false</code>
 352                 /// </summary>
 353                 public IndexWriter(System.String path, Analyzer a, bool create) : this(FSDirectory.GetDirectory(path, create), a, create, true)
 354                 {
 355                 }
 356
 357                 /// <summary> Constructs an IndexWriter for the index in <code>path</code>.
 358                 /// Text will be analyzed with <code>a</code>.  If <code>create</code>
 359                 /// is true, then a new, empty index will be created in
 360                 /// <code>path</code>, replacing the index already there, if any.
 361                 ///
 362                 /// </summary>
 363                 /// <param name="path">the path to the index directory
 364                 /// </param>
 365                 /// <param name="a">the analyzer to use
 366                 /// </param>
 367                 /// <param name="create"><code>true</code> to create the index or overwrite
 368                 /// the existing one; <code>false</code> to append to the existing
 369                 /// index
 370                 /// </param>
 371                 /// <throws>  IOException if the directory cannot be read/written to, or </throws>
 372                 /// <summary>  if it does not exist, and <code>create</code> is
 373                 /// <code>false</code>
 374                 /// </summary>
 375                 public IndexWriter(System.IO.FileInfo path, Analyzer a, bool create) : this(FSDirectory.GetDirectory(path, create), a, create, true)
 376                 {
 377                 }
 378
 379                 /// <summary> Constructs an IndexWriter for the index in <code>d</code>.
 380                 /// Text will be analyzed with <code>a</code>.  If <code>create</code>
 381                 /// is true, then a new, empty index will be created in
 382                 /// <code>d</code>, replacing the index already there, if any.
 383                 ///
 384                 /// </summary>
 385                 /// <param name="d">the index directory
 386                 /// </param>
 387                 /// <param name="a">the analyzer to use
 388                 /// </param>
 389                 /// <param name="create"><code>true</code> to create the index or overwrite
 390                 /// the existing one; <code>false</code> to append to the existing
 391                 /// index
 392                 /// </param>
 393                 /// <throws>  IOException if the directory cannot be read/written to, or </throws>
 394                 /// <summary>  if it does not exist, and <code>create</code> is
 395                 /// <code>false</code>
 396                 /// </summary>
 397                 public IndexWriter(Directory d, Analyzer a, bool create) : this(d, a, create, false)
 398                 {
 399                 }
 400
 401                 private IndexWriter(Directory d, Analyzer a, bool create, bool closeDir)
 402                 {
 403                         InitBlock();
 404                         this.closeDir = closeDir;
 405                         directory = d;
 406                         analyzer = a;
 407
 408                         Lock writeLock = directory.MakeLock(IndexWriter.WRITE_LOCK_NAME);
 409                         if (!writeLock.Obtain(WRITE_LOCK_TIMEOUT))
 410                         // obtain write lock
 411                         {
 412                                 throw new System.IO.IOException("Index locked for write: " + writeLock);
 413                         }
 414                         this.writeLock = writeLock; // save it
 415
 416                         lock (directory)
 417                         {
 418                                 // in- & inter-process sync
 419                                 new AnonymousClassWith(create, this, directory.MakeLock(IndexWriter.COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT).Run();
 420                         }
 421                 }
 422
 423                 /// <summary>Determines the largest number of documents ever merged by addDocument().
 424                 /// Small values (e.g., less than 10,000) are best for interactive indexing,
 425                 /// as this limits the length of pauses while indexing to a few seconds.
 426                 /// Larger values are best for batched indexing and speedier searches.
 427                 ///
 428                 /// <p>The default value is {@link Integer#MAX_VALUE}.
 429                 /// </summary>
 430                 public virtual void  SetMaxMergeDocs(int maxMergeDocs)
 431                 {
 432                         this.maxMergeDocs = maxMergeDocs;
 433                 }
 434
 435                 /// <seealso cref="setMaxMergeDocs">
 436                 /// </seealso>
 437                 public virtual int GetMaxMergeDocs()
 438                 {
 439                         return maxMergeDocs;
 440                 }
 441
 442                 /// <summary> The maximum number of terms that will be indexed for a single field in a
 443                 /// document.  This limits the amount of memory required for indexing, so that
 444                 /// collections with very large files will not crash the indexing process by
 445                 /// running out of memory.<p/>
 446                 /// Note that this effectively truncates large documents, excluding from the
 447                 /// index terms that occur further in the document.  If you know your source
 448                 /// documents are large, be sure to set this value high enough to accomodate
 449                 /// the expected size.  If you set it to Integer.MAX_VALUE, then the only limit
 450                 /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
 451                 /// By default, no more than 10,000 terms will be indexed for a field.
 452                 /// </summary>
 453                 public virtual void  SetMaxFieldLength(int maxFieldLength)
 454                 {
 455                         this.maxFieldLength = maxFieldLength;
 456                 }
 457
 458                 /// <seealso cref="setMaxFieldLength">
 459                 /// </seealso>
 460                 public virtual int GetMaxFieldLength()
 461                 {
 462                         return maxFieldLength;
 463                 }
 464
 465                 /// <summary>Determines the minimal number of documents required before the buffered
 466                 /// in-memory documents are merging and a new Segment is created.
 467                 /// Since Documents are merged in a {@link Lucene.Net.store.RAMDirectory},
 468                 /// large value gives faster indexing.  At the same time, mergeFactor limits
 469                 /// the number of files open in a FSDirectory.
 470                 ///
 471                 /// <p> The default value is 10.
 472                 ///
 473                 /// </summary>
 474                 /// <throws>  IllegalArgumentException if maxBufferedDocs is smaller than 1  </throws>
 475                 public virtual void  SetMaxBufferedDocs(int maxBufferedDocs)
 476                 {
 477                         if (maxBufferedDocs < 2)
 478                                 throw new System.ArgumentException("maxBufferedDocs must at least be 2");
 479                         this.minMergeDocs = maxBufferedDocs;
 480                 }
 481
 482                 /// <seealso cref="setMaxBufferedDocs">
 483                 /// </seealso>
 484                 public virtual int GetMaxBufferedDocs()
 485                 {
 486                         return minMergeDocs;
 487                 }
 488
 489                 /// <summary>Determines how often segment indices are merged by addDocument().  With
 490                 /// smaller values, less RAM is used while indexing, and searches on
 491                 /// unoptimized indices are faster, but indexing speed is slower.  With larger
 492                 /// values, more RAM is used during indexing, and while searches on unoptimized
 493                 /// indices are slower, indexing is faster.  Thus larger values (> 10) are best
 494                 /// for batch index creation, and smaller values (< 10) for indices that are
 495                 /// interactively maintained.
 496                 ///
 497                 /// <p>This must never be less than 2.  The default value is 10.
 498                 /// </summary>
 499                 public virtual void  SetMergeFactor(int mergeFactor)
 500                 {
 501                         if (mergeFactor < 2)
 502                                 throw new System.ArgumentException("mergeFactor cannot be less than 2");
 503                         this.mergeFactor = mergeFactor;
 504                 }
 505
 506                 /// <seealso cref="setMergeFactor">
 507                 /// </seealso>
 508                 public virtual int GetMergeFactor()
 509                 {
 510                         return mergeFactor;
 511                 }
 512
 513                 /// <summary>If non-null, information about merges and a message when
 514                 /// maxFieldLength is reached will be printed to this.
 515                 /// </summary>
 516                 public virtual void  SetInfoStream(System.IO.TextWriter infoStream)
 517                 {
 518                         this.infoStream = infoStream;
 519                 }
 520
 521                 /// <seealso cref="setInfoStream">
 522                 /// </seealso>
 523                 public virtual System.IO.TextWriter GetInfoStream()
 524                 {
 525                         return infoStream;
 526                 }
 527
 528                 /// <summary>Flushes all changes to an index and closes all associated files. </summary>
 529                 public virtual void  Close()
 530                 {
 531                         lock (this)
 532                         {
 533                                 FlushRamSegments();
 534                                 ramDirectory.Close();
 535                                 if (writeLock != null)
 536                                 {
 537                                         writeLock.Release(); // release write lock
 538                                         writeLock = null;
 539                                 }
 540                                 if (closeDir)
 541                                         directory.Close();
 542                 System.GC.SuppressFinalize(this);
 543             }
 544                 }
 545
 546                 /// <summary>Release the write lock, if needed. </summary>
 547                 ~IndexWriter()
 548                 {
 549                         if (writeLock != null)
 550                         {
 551                                 writeLock.Release(); // release write lock
 552                                 writeLock = null;
 553                         }
 554                 }
 555
 556                 /// <summary>Returns the Directory used by this index. </summary>
 557                 public virtual Directory GetDirectory()
 558                 {
 559                         return directory;
 560                 }
 561
 562                 /// <summary>Returns the analyzer used by this index. </summary>
 563                 public virtual Analyzer GetAnalyzer()
 564                 {
 565                         return analyzer;
 566                 }
 567
 568
 569                 /// <summary>Returns the number of documents currently in this index. </summary>
 570                 public virtual int DocCount()
 571                 {
 572                         lock (this)
 573                         {
 574                                 int count = 0;
 575                                 for (int i = 0; i < segmentInfos.Count; i++)
 576                                 {
 577                                         SegmentInfo si = segmentInfos.Info(i);
 578                                         count += si.docCount;
 579                                 }
 580                                 return count;
 581                         }
 582                 }
 583
 584                 /// <summary> The maximum number of terms that will be indexed for a single field in a
 585                 /// document.  This limits the amount of memory required for indexing, so that
 586                 /// collections with very large files will not crash the indexing process by
 587                 /// running out of memory.<p/>
 588                 /// Note that this effectively truncates large documents, excluding from the
 589                 /// index terms that occur further in the document.  If you know your source
 590                 /// documents are large, be sure to set this value high enough to accomodate
 591                 /// the expected size.  If you set it to Integer.MAX_VALUE, then the only limit
 592                 /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
 593                 /// By default, no more than 10,000 terms will be indexed for a field.
 594                 ///
 595                 /// </summary>
 596                 /// <deprecated> use {@link #setMaxFieldLength} instead
 597                 /// </deprecated>
 598                 public int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
 599
 600                 /// <summary> Adds a document to this index.  If the document contains more than
 601                 /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
 602                 /// discarded.
 603                 /// </summary>
 604                 public virtual void  AddDocument(Document doc)
 605                 {
 606                         AddDocument(doc, analyzer);
 607                 }
 608
 609                 /// <summary> Adds a document to this index, using the provided analyzer instead of the
 610                 /// value of {@link #GetAnalyzer()}.  If the document contains more than
 611                 /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
 612                 /// discarded.
 613                 /// </summary>
 614                 public virtual void  AddDocument(Document doc, Analyzer analyzer)
 615                 {
 616                         DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this);
 617                         dw.SetInfoStream(infoStream);
 618                         System.String segmentName = NewSegmentName();
 619                         dw.AddDocument(segmentName, doc);
 620                         lock (this)
 621                         {
 622                                 segmentInfos.Add(new SegmentInfo(segmentName, 1, ramDirectory));
 623                                 singleDocSegmentsCount++;
 624                                 MaybeMergeSegments();
 625                         }
 626                 }
 627
 628                 internal int GetSegmentsCounter()
 629                 {
 630                         return segmentInfos.counter;
 631                 }
 632
 633                 private System.String NewSegmentName()
 634                 {
 635                         lock (this)
 636                         {
 637                 return "_" + SupportClass.Number.ToString(segmentInfos.counter++, SupportClass.Number.MAX_RADIX);
 638                         }
 639                 }
 640
 641                 /// <summary>Determines how often segment indices are merged by addDocument().  With
 642                 /// smaller values, less RAM is used while indexing, and searches on
 643                 /// unoptimized indices are faster, but indexing speed is slower.  With larger
 644                 /// values, more RAM is used during indexing, and while searches on unoptimized
 645                 /// indices are slower, indexing is faster.  Thus larger values (> 10) are best
 646                 /// for batch index creation, and smaller values (< 10) for indices that are
 647                 /// interactively maintained.
 648                 ///
 649                 /// <p>This must never be less than 2.  The default value is 10.
 650                 /// </summary>
 651                 /// <deprecated> use {@link #setMergeFactor} instead
 652                 /// </deprecated>
 653                 public int mergeFactor = DEFAULT_MERGE_FACTOR;
 654
 655                 /// <summary>Determines the minimal number of documents required before the buffered
 656                 /// in-memory documents are merging and a new Segment is created.
 657                 /// Since Documents are merged in a {@link Lucene.Net.store.RAMDirectory},
 658                 /// large value gives faster indexing.  At the same time, mergeFactor limits
 659                 /// the number of files open in a FSDirectory.
 660                 ///
 661                 /// <p> The default value is 10.
 662                 /// </summary>
 663                 /// <deprecated> use {@link #setMaxBufferedDocs} instead
 664                 /// </deprecated>
 665                 public int minMergeDocs = DEFAULT_MIN_MERGE_DOCS;
 666
 667
 668                 /// <summary>Determines the largest number of documents ever merged by addDocument().
 669                 /// Small values (e.g., less than 10,000) are best for interactive indexing,
 670                 /// as this limits the length of pauses while indexing to a few seconds.
 671                 /// Larger values are best for batched indexing and speedier searches.
 672                 ///
 673                 /// <p>The default value is {@link Integer#MAX_VALUE}.
 674                 /// </summary>
 675                 /// <deprecated> use {@link #setMaxMergeDocs} instead
 676                 /// </deprecated>
 677                 public int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
 678
 679                 /// <summary>If non-null, information about merges will be printed to this.</summary>
 680                 /// <deprecated> use {@link #setInfoStream} instead
 681                 /// </deprecated>
 682                 public System.IO.TextWriter infoStream = null;
 683
 684                 /// <summary>Merges all segments together into a single segment, optimizing an index
 685                 /// for search.
 686                 /// </summary>
 687                 public virtual void  Optimize()
 688                 {
 689                         lock (this)
 690                         {
 691                                 FlushRamSegments();
 692                                 while (segmentInfos.Count > 1 || (segmentInfos.Count == 1 && (SegmentReader.HasDeletions(segmentInfos.Info(0)) || segmentInfos.Info(0).dir != directory || (useCompoundFile && (!SegmentReader.UsesCompoundFile(segmentInfos.Info(0)) || SegmentReader.HasSeparateNorms(segmentInfos.Info(0)))))))
 693                                 {
 694                                         int minSegment = segmentInfos.Count - mergeFactor;
 695                                         MergeSegments(minSegment < 0?0:minSegment);
 696                                 }
 697                         }
 698                 }
 699
 700                 /// <summary>Merges all segments from an array of indexes into this index.
 701                 ///
 702                 /// <p>This may be used to parallelize batch indexing.  A large document
 703                 /// collection can be broken into sub-collections.  Each sub-collection can be
 704                 /// indexed in parallel, on a different thread, process or machine.  The
 705                 /// complete index can then be created by merging sub-collection indexes
 706                 /// with this method.
 707                 ///
 708                 /// <p>After this completes, the index is optimized.
 709                 /// </summary>
 710                 public virtual void  AddIndexes(Directory[] dirs)
 711                 {
 712                         lock (this)
 713                         {
 714                                 Optimize(); // start with zero or 1 seg
 715
 716                                 int start = segmentInfos.Count;
 717
 718                                 for (int i = 0; i < dirs.Length; i++)
 719                                 {
 720                                         SegmentInfos sis = new SegmentInfos(); // read infos from dir
 721                                         sis.Read(dirs[i]);
 722                                         for (int j = 0; j < sis.Count; j++)
 723                                         {
 724                                                 segmentInfos.Add(sis.Info(j)); // add each info
 725                                         }
 726                                 }
 727
 728                                 // merge newly added segments in log(n) passes
 729                                 while (segmentInfos.Count > start + mergeFactor)
 730                                 {
 731                                         for (int base_Renamed = start + 1; base_Renamed < segmentInfos.Count; base_Renamed++)
 732                                         {
 733                                                 int end = System.Math.Min(segmentInfos.Count, base_Renamed + mergeFactor);
 734                                                 if (end - base_Renamed > 1)
 735                                                         MergeSegments(base_Renamed, end);
 736                                         }
 737                                 }
 738
 739                                 Optimize(); // final cleanup
 740                         }
 741                 }
 742
 743                 /// <summary>Merges the provided indexes into this index.
 744                 /// <p>After this completes, the index is optimized. </p>
 745                 /// <p>The provided IndexReaders are not closed.</p>
 746                 /// </summary>
 747                 public virtual void  AddIndexes(IndexReader[] readers)
 748                 {
 749                         lock (this)
 750                         {
 751
 752                                 Optimize(); // start with zero or 1 seg
 753
 754                                 System.String mergedName = NewSegmentName();
 755                                 SegmentMerger merger = new SegmentMerger(this, mergedName);
 756
 757                                 System.Collections.ArrayList segmentsToDelete = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
 758                                 IndexReader sReader = null;
 759                                 if (segmentInfos.Count == 1)
 760                                 {
 761                                         // add existing index, if any
 762                                         sReader = SegmentReader.Get(segmentInfos.Info(0));
 763                                         merger.Add(sReader);
 764                                         segmentsToDelete.Add(sReader); // queue segment for deletion
 765                                 }
 766
 767                                 for (int i = 0; i < readers.Length; i++)
 768                                 // add new indexes
 769                                         merger.Add(readers[i]);
 770
 771                                 int docCount = merger.Merge(); // merge 'em
 772
 773                                 segmentInfos.RemoveRange(0, segmentInfos.Count - 0);  // pop old infos & add new
 774                                 segmentInfos.Add(new SegmentInfo(mergedName, docCount, directory));
 775
 776                                 if (sReader != null)
 777                                         sReader.Close();
 778
 779                                 lock (directory)
 780                                 {
 781                                         // in- & inter-process sync
 782                                         new AnonymousClassWith1(segmentsToDelete, this, directory.MakeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT).Run();
 783                                 }
 784
 785                                 if (useCompoundFile)
 786                                 {
 787                                         System.Collections.ArrayList filesToDelete = merger.CreateCompoundFile(mergedName + ".tmp");
 788                                         lock (directory)
 789                                         {
 790                                                 // in- & inter-process sync
 791                                                 new AnonymousClassWith2(mergedName, filesToDelete, this, directory.MakeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT).Run();
 792                                         }
 793                                 }
 794                         }
 795                 }
 796
 797                 /// <summary>Merges all RAM-resident segments. </summary>
 798                 private void  FlushRamSegments()
 799                 {
 800                         int minSegment = segmentInfos.Count - 1;
 801                         int docCount = 0;
 802                         while (minSegment >= 0 && (segmentInfos.Info(minSegment)).dir == ramDirectory)
 803                         {
 804                                 docCount += segmentInfos.Info(minSegment).docCount;
 805                                 minSegment--;
 806                         }
 807                         if (minSegment < 0 || (docCount + segmentInfos.Info(minSegment).docCount) > mergeFactor || !(segmentInfos.Info(segmentInfos.Count - 1).dir == ramDirectory))
 808                                 minSegment++;
 809                         if (minSegment >= segmentInfos.Count)
 810                                 return ; // none to merge
 811                         MergeSegments(minSegment);
 812                 }
 813
 814                 /// <summary>Incremental segment merger.  </summary>
 815                 private void  MaybeMergeSegments()
 816                 {
 817                         long targetMergeDocs = minMergeDocs;
 818                         while (targetMergeDocs <= maxMergeDocs)
 819                         {
 820                                 // find segments smaller than current target size
 821                                 int minSegment = segmentInfos.Count - singleDocSegmentsCount; // top 1-doc segments are taken for sure
 822                                 int mergeDocs = singleDocSegmentsCount;
 823                                 while (--minSegment >= 0)
 824                                 {
 825                                         SegmentInfo si = segmentInfos.Info(minSegment);
 826                                         if (si.docCount >= targetMergeDocs)
 827                                                 break;
 828                                         mergeDocs += si.docCount;
 829                                 }
 830
 831                                 if (mergeDocs >= targetMergeDocs) {
 832                                 // found a merge to do
 833                                         MergeSegments(minSegment + 1);
 834                                         singleDocSegmentsCount = 0;
 835                                 } else
 836                                         break;
 837
 838                                 targetMergeDocs *= mergeFactor; // increase target size
 839                         }
 840                 }
 841
 842                 /// <summary>Pops segments off of segmentInfos stack down to minSegment, merges them,
 843                 /// and pushes the merged index onto the top of the segmentInfos stack.
 844                 /// </summary>
 845                 private void  MergeSegments(int minSegment)
 846                 {
 847                         MergeSegments(minSegment, segmentInfos.Count);
 848                 }
 849
 850                 /// <summary>Merges the named range of segments, replacing them in the stack with a
 851                 /// single segment.
 852                 /// </summary>
 853                 private void  MergeSegments(int minSegment, int end)
 854                 {
 855                         System.String mergedName = NewSegmentName();
 856                         if (infoStream != null)
 857                                 infoStream.Write("merging segments");
 858                         SegmentMerger merger = new SegmentMerger(this, mergedName);
 859
 860                         System.Collections.ArrayList segmentsToDelete = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
 861                         for (int i = minSegment; i < end; i++)
 862                         {
 863                                 SegmentInfo si = segmentInfos.Info(i);
 864                                 if (infoStream != null)
 865                                         infoStream.Write(" " + si.name + " (" + si.docCount + " docs)");
 866                                 IndexReader reader = SegmentReader.Get(si);
 867                                 merger.Add(reader);
 868                                 if ((reader.Directory() == this.directory) || (reader.Directory() == this.ramDirectory))
 869                                         segmentsToDelete.Add(reader); // queue segment for deletion
 870                         }
 871
 872                         int mergedDocCount = merger.Merge();
 873
 874                         if (infoStream != null)
 875                         {
 876                                 infoStream.WriteLine(" into " + mergedName + " (" + mergedDocCount + " docs)");
 877                         }
 878
 879                         for (int i = end - 1; i >= minSegment; i--)
 880                         // remove old infos & add new
 881                                 segmentInfos.RemoveAt(i);
 882                         segmentInfos.Add(new SegmentInfo(mergedName, mergedDocCount, directory));
 883
 884                         // close readers before we attempt to delete now-obsolete segments
 885                         merger.CloseReaders();
 886
 887                         lock (directory)
 888                         {
 889                                 // in- & inter-process sync
 890                                 new AnonymousClassWith3(segmentsToDelete, this, directory.MakeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT).Run();
 891                         }
 892
 893                         if (useCompoundFile)
 894                         {
 895                                 System.Collections.ArrayList filesToDelete = merger.CreateCompoundFile(mergedName + ".tmp");
 896                                 lock (directory)
 897                                 {
 898                                         // in- & inter-process sync
 899                                         new AnonymousClassWith4(mergedName, filesToDelete, this, directory.MakeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT).Run();
 900                                 }
 901                         }
 902                 }
 903
 904                 /*
 905                 * Some operating systems (e.g. Windows) don't permit a file to be deleted
 906                 * while it is opened for read (e.g. by another process or thread). So we
 907                 * assume that when a delete fails it is because the file is open in another
 908                 * process, and queue the file for subsequent deletion.
 909                 */
 910
 911                 private void  DeleteSegments(System.Collections.ArrayList segments)
 912                 {
 913                         System.Collections.ArrayList deletable = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
 914
 915                         DeleteFiles(ReadDeleteableFiles(), deletable); // try to delete deleteable
 916
 917                         for (int i = 0; i < segments.Count; i++)
 918                         {
 919                                 SegmentReader reader = (SegmentReader) segments[i];
 920                                 if (reader.Directory() == this.directory)
 921                                         DeleteFiles(reader.Files(), deletable);
 922                                 // try to delete our files
 923                                 else
 924                                         DeleteFiles(reader.Files(), reader.Directory()); // delete other files
 925                         }
 926
 927                         WriteDeleteableFiles(deletable); // note files we can't delete
 928                 }
 929
 930                 private void  DeleteFiles(System.Collections.ArrayList files)
 931                 {
 932                         System.Collections.ArrayList deletable = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
 933                         DeleteFiles(ReadDeleteableFiles(), deletable); // try to delete deleteable
 934                         DeleteFiles(files, deletable); // try to delete our files
 935                         WriteDeleteableFiles(deletable); // note files we can't delete
 936                 }
 937
 938                 private void  DeleteFiles(System.Collections.ArrayList files, Directory directory)
 939                 {
 940                         for (int i = 0; i < files.Count; i++)
 941                                 directory.DeleteFile((System.String) files[i]);
 942                 }
 943
 944                 private void  DeleteFiles(System.Collections.ArrayList files, System.Collections.ArrayList deletable)
 945                 {
 946                         for (int i = 0; i < files.Count; i++)
 947                         {
 948                                 System.String file = (System.String) files[i];
 949                                 try
 950                                 {
 951                                         directory.DeleteFile(file); // try to delete each file
 952                                 }
 953                                 catch (System.IO.IOException e)
 954                                 {
 955                                         // if delete fails
 956                                         if (directory.FileExists(file))
 957                                         {
 958                                                 if (infoStream != null)
 959                                                 {
 960                                                         infoStream.WriteLine(e.ToString() + "; Will re-try later.");
 961                                                 }
 962                                                 deletable.Add(file); // add to deletable
 963                                         }
 964                                 }
 965                         }
 966                 }
 967
 968                 private System.Collections.ArrayList ReadDeleteableFiles()
 969                 {
 970                         System.Collections.ArrayList result = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
 971                         if (!directory.FileExists(IndexFileNames.DELETABLE))
 972                                 return result;
 973
 974                         IndexInput input = directory.OpenInput(IndexFileNames.DELETABLE);
 975                         try
 976                         {
 977                                 for (int i = input.ReadInt(); i > 0; i--)
 978                                 // read file names
 979                                         result.Add(input.ReadString());
 980                         }
 981                         finally
 982                         {
 983                                 input.Close();
 984                         }
 985                         return result;
 986                 }
 987
 988                 private void  WriteDeleteableFiles(System.Collections.ArrayList files)
 989                 {
 990                         IndexOutput output = directory.CreateOutput("deleteable.new");
 991                         try
 992                         {
 993                                 output.WriteInt(files.Count);
 994                                 for (int i = 0; i < files.Count; i++)
 995                                         output.WriteString((System.String) files[i]);
 996                         }
 997                         finally
 998                         {
 999                                 output.Close();
1000                         }
1001                         directory.RenameFile("deleteable.new", IndexFileNames.DELETABLE);
1002                 }
1003         }
1004 }