2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 using Directory
= Lucene
.Net
.Store
.Directory
;
19 using IndexInput
= Lucene
.Net
.Store
.IndexInput
;
20 using IndexOutput
= Lucene
.Net
.Store
.IndexOutput
;
22 namespace Lucene
.Net
.Index
26 /// <summary> Combines multiple files into a single compound file.
27 /// The file format:<br>
29 /// <li>VInt fileCount</li>
31 /// fileCount entries with the following structure:</li>
33 /// <li>long dataOffset</li>
34 /// <li>String fileName</li>
37 /// fileCount entries with the raw data of the corresponding file</li>
40 /// The fileCount integer indicates how many files are contained in this compound
41 /// file. The {directory} that follows has that many entries. Each directory entry
42 /// contains a long pointer to the start of this file's data section, and a String
43 /// with that file's name.
46 /// <author> Dmitry Serebrennikov
48 /// <version> $Id: CompoundFileWriter.cs,v 1.4 2006/10/02 17:08:52 joeshaw Exp $
50 public sealed class CompoundFileWriter
53 private sealed class FileEntry
55 /// <summary>source file </summary>
56 internal System
.String file
;
58 /// <summary>temporary holder for the start of directory entry for this file </summary>
59 internal long directoryOffset
;
61 /// <summary>temporary holder for the start of this file's data section </summary>
62 internal long dataOffset
;
66 private Directory directory
;
67 private System
.String fileName
;
68 private System
.Collections
.Hashtable ids
;
69 private System
.Collections
.ArrayList entries
;
70 private bool merged
= false;
73 /// <summary>Create the compound stream in the specified file. The file name is the
74 /// entire name (no extensions are added).
76 /// <throws> NullPointerException if <code>dir</code> or <code>name</code> is null </throws>
77 public CompoundFileWriter(Directory dir
, System
.String name
)
80 throw new System
.NullReferenceException("directory cannot be null");
82 throw new System
.NullReferenceException("name cannot be null");
86 ids
= new System
.Collections
.Hashtable();
87 entries
= new System
.Collections
.ArrayList();
90 /// <summary>Returns the directory of the compound file. </summary>
91 public Directory
GetDirectory()
96 /// <summary>Returns the name of the compound file. </summary>
97 public System
.String
GetName()
102 /// <summary>Add a source stream. <code>file</code> is the string by which the
103 /// sub-stream will be known in the compound stream.
106 /// <throws> IllegalStateException if this writer is closed </throws>
107 /// <throws> NullPointerException if <code>file</code> is null </throws>
108 /// <throws> IllegalArgumentException if a file with the same name </throws>
109 /// <summary> has been added already
111 public void AddFile(System
.String file
)
114 throw new System
.SystemException("Can't add extensions after merge has been called");
117 throw new System
.NullReferenceException("file cannot be null");
125 throw new System
.ArgumentException("File " + file
+ " already added");
128 FileEntry entry
= new FileEntry();
133 /// <summary>Merge files with the extensions added up to now.
134 /// All files with these extensions are combined sequentially into the
135 /// compound stream. After successful merge, the source files
138 /// <throws> IllegalStateException if close() had been called before or </throws>
139 /// <summary> if no file has been added to this object
144 throw new System
.SystemException("Merge already performed");
146 if ((entries
.Count
== 0))
147 throw new System
.SystemException("No entries to merge have been defined");
151 // open the compound stream
152 IndexOutput os
= null;
155 os
= directory
.CreateOutput(fileName
);
157 // Write the number of entries
158 os
.WriteVInt(entries
.Count
);
160 // Write the directory with all offsets at 0.
161 // Remember the positions of directory entries so that we can
162 // adjust the offsets later
163 System
.Collections
.IEnumerator it
= entries
.GetEnumerator();
164 while (it
.MoveNext())
166 FileEntry fe
= (FileEntry
) it
.Current
;
167 fe
.directoryOffset
= os
.GetFilePointer();
168 os
.WriteLong(0); // for now
169 os
.WriteString(fe
.file
);
172 // Open the files and copy their data into the stream.
173 // Remember the locations of each file's data section.
174 byte[] buffer
= new byte[1024];
175 it
= entries
.GetEnumerator();
176 while (it
.MoveNext())
178 FileEntry fe
= (FileEntry
) it
.Current
;
179 fe
.dataOffset
= os
.GetFilePointer();
180 CopyFile(fe
, os
, buffer
);
183 // Write the data offsets into the directory of the compound stream
184 it
= entries
.GetEnumerator();
185 while (it
.MoveNext())
187 FileEntry fe
= (FileEntry
) it
.Current
;
188 os
.Seek(fe
.directoryOffset
);
189 os
.WriteLong(fe
.dataOffset
);
192 // Close the output stream. Set the os to null before trying to
193 // close so that if an exception occurs during the close, the
194 // finally clause below will not attempt to close the stream
196 IndexOutput tmp
= os
;
207 catch (System
.IO
.IOException
)
213 /// <summary>Copy the contents of the file with specified extension into the
214 /// provided output stream. Use the provided buffer for moving data
215 /// to reduce memory allocation.
217 private void CopyFile(FileEntry source
, IndexOutput os
, byte[] buffer
)
219 IndexInput is_Renamed
= null;
222 long startPtr
= os
.GetFilePointer();
224 is_Renamed
= directory
.OpenInput(source
.file
);
225 long length
= is_Renamed
.Length();
226 long remainder
= length
;
227 int chunk
= buffer
.Length
;
229 while (remainder
> 0)
231 int len
= (int) System
.Math
.Min(chunk
, remainder
);
232 is_Renamed
.ReadBytes(buffer
, 0, len
);
233 os
.WriteBytes(buffer
, len
);
237 // Verify that remainder is 0
239 throw new System
.IO
.IOException("Non-zero remainder length after copying: " + remainder
+ " (id: " + source
.file
+ ", length: " + length
+ ", buffer size: " + chunk
+ ")");
241 // Verify that the output length diff is equal to original file
242 long endPtr
= os
.GetFilePointer();
243 long diff
= endPtr
- startPtr
;
245 throw new System
.IO
.IOException("Difference in the output file offsets " + diff
+ " does not match the original file length " + length
);
249 if (is_Renamed
!= null)