2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using Directory
= Lucene
.Net
.Store
.Directory
;
18 using IndexInput
= Lucene
.Net
.Store
.IndexInput
;
19 using IndexOutput
= Lucene
.Net
.Store
.IndexOutput
;
20 namespace Lucene
.Net
.Index
24 /// <summary> Combines multiple files into a single compound file.
25 /// The file format:<br>
27 /// <li>VInt fileCount</li>
29 /// fileCount entries with the following structure:</li>
31 /// <li>long dataOffset</li>
32 /// <li>UTFString extension</li>
35 /// fileCount entries with the raw data of the corresponding file</li>
38 /// The fileCount integer indicates how many files are contained in this compound
39 /// file. The {directory} that follows has that many entries. Each directory entry
40 /// contains an encoding identifier, a long pointer to the start of this file's
41 /// data section, and a UTF String with that file's extension.
44 /// <author> Dmitry Serebrennikov
46 /// <version> $Id: CompoundFileWriter.cs,v 1.3 2005/10/06 19:29:55 dsd Exp $
48 public sealed class CompoundFileWriter
51 private sealed class FileEntry
53 /// <summary>source file </summary>
54 internal System
.String file
;
56 /// <summary>temporary holder for the start of directory entry for this file </summary>
57 internal long directoryOffset
;
59 /// <summary>temporary holder for the start of this file's data section </summary>
60 internal long dataOffset
;
64 private Directory directory
;
65 private System
.String fileName
;
66 private System
.Collections
.Hashtable ids
;
67 private System
.Collections
.ArrayList entries
;
68 private bool merged
= false;
71 /// <summary>Create the compound stream in the specified file. The file name is the
72 /// entire name (no extensions are added).
74 /// <throws> NullPointerException if <code>dir</code> or <code>name</code> is null </throws>
75 public CompoundFileWriter(Directory dir
, System
.String name
)
78 throw new System
.NullReferenceException("directory cannot be null");
80 throw new System
.NullReferenceException("name cannot be null");
84 ids
= new System
.Collections
.Hashtable();
85 entries
= new System
.Collections
.ArrayList();
88 /// <summary>Returns the directory of the compound file. </summary>
89 public Directory
GetDirectory()
94 /// <summary>Returns the name of the compound file. </summary>
95 public System
.String
GetName()
100 /// <summary>Add a source stream. <code>file</code> is the string by which the
101 /// sub-stream will be known in the compound stream.
104 /// <throws> IllegalStateException if this writer is closed </throws>
105 /// <throws> NullPointerException if <code>file</code> is null </throws>
106 /// <throws> IllegalArgumentException if a file with the same name </throws>
107 /// <summary> has been added already
109 public void AddFile(System
.String file
)
112 throw new System
.SystemException("Can't add extensions after merge has been called");
115 throw new System
.NullReferenceException("file cannot be null");
123 throw new System
.ArgumentException("File " + file
+ " already added");
126 FileEntry entry
= new FileEntry();
131 /// <summary>Merge files with the extensions added up to now.
132 /// All files with these extensions are combined sequentially into the
133 /// compound stream. After successful merge, the source files
136 /// <throws> IllegalStateException if close() had been called before or </throws>
137 /// <summary> if no file has been added to this object
142 throw new System
.SystemException("Merge already performed");
144 if ((entries
.Count
== 0))
145 throw new System
.SystemException("No entries to merge have been defined");
149 // open the compound stream
150 IndexOutput os
= null;
153 os
= directory
.CreateOutput(fileName
);
155 // Write the number of entries
156 os
.WriteVInt(entries
.Count
);
158 // Write the directory with all offsets at 0.
159 // Remember the positions of directory entries so that we can
160 // adjust the offsets later
161 System
.Collections
.IEnumerator it
= entries
.GetEnumerator();
162 while (it
.MoveNext())
164 FileEntry fe
= (FileEntry
) it
.Current
;
165 fe
.directoryOffset
= os
.GetFilePointer();
166 os
.WriteLong(0); // for now
167 os
.WriteString(fe
.file
);
170 // Open the files and copy their data into the stream.
171 // Remember the locations of each file's data section.
172 byte[] buffer
= new byte[1024];
173 it
= entries
.GetEnumerator();
174 while (it
.MoveNext())
176 FileEntry fe
= (FileEntry
) it
.Current
;
177 fe
.dataOffset
= os
.GetFilePointer();
178 CopyFile(fe
, os
, buffer
);
181 // Write the data offsets into the directory of the compound stream
182 it
= entries
.GetEnumerator();
183 while (it
.MoveNext())
185 FileEntry fe
= (FileEntry
) it
.Current
;
186 os
.Seek(fe
.directoryOffset
);
187 os
.WriteLong(fe
.dataOffset
);
190 // Close the output stream. Set the os to null before trying to
191 // close so that if an exception occurs during the close, the
192 // finally clause below will not attempt to close the stream
194 IndexOutput tmp
= os
;
205 catch (System
.IO
.IOException
)
211 /// <summary>Copy the contents of the file with specified extension into the
212 /// provided output stream. Use the provided buffer for moving data
213 /// to reduce memory allocation.
215 private void CopyFile(FileEntry source
, IndexOutput os
, byte[] buffer
)
217 IndexInput is_Renamed
= null;
220 long startPtr
= os
.GetFilePointer();
222 is_Renamed
= directory
.OpenInput(source
.file
);
223 long length
= is_Renamed
.Length();
224 long remainder
= length
;
225 int chunk
= buffer
.Length
;
227 while (remainder
> 0)
229 int len
= (int) System
.Math
.Min(chunk
, remainder
);
230 is_Renamed
.ReadBytes(buffer
, 0, len
);
231 os
.WriteBytes(buffer
, len
);
235 // Verify that remainder is 0
237 throw new System
.IO
.IOException("Non-zero remainder length after copying: " + remainder
+ " (id: " + source
.file
+ ", length: " + length
+ ", buffer size: " + chunk
+ ")");
239 // Verify that the output length diff is equal to original file
240 long endPtr
= os
.GetFilePointer();
241 long diff
= endPtr
- startPtr
;
243 throw new System
.IO
.IOException("Difference in the output file offsets " + diff
+ " does not match the original file length " + length
);
247 if (is_Renamed
!= null)