2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using Directory
= Lucene
.Net
.Store
.Directory
;
18 using InputStream
= Lucene
.Net
.Store
.InputStream
;
19 using OutputStream
= Lucene
.Net
.Store
.OutputStream
;
20 namespace Lucene
.Net
.Index
24 /// <summary> Combines multiple files into a single compound file.
25 /// The file format:<br>
27 /// <li>VInt fileCount</li>
29 /// fileCount entries with the following structure:</li>
31 /// <li>long dataOffset</li>
32 /// <li>UTFString extension</li>
35 /// fileCount entries with the raw data of the corresponding file</li>
38 /// The fileCount integer indicates how many files are contained in this compound
39 /// file. The {directory} that follows has that many entries. Each directory entry
40 /// contains an encoding identifier, an long pointer to the start of this file's
41 /// data section, and a UTF String with that file's extension.
44 /// <author> Dmitry Serebrennikov
46 /// <version> $Id: CompoundFileWriter.cs,v 1.2 2005/01/17 19:54:29 joeshaw Exp $
48 sealed public class CompoundFileWriter
51 private sealed class FileEntry
53 /// <summary>source file </summary>
54 internal System
.String file
;
56 /// <summary>temporary holder for the start of directory entry for this file </summary>
57 internal long directoryOffset
;
59 /// <summary>temporary holder for the start of this file's data section </summary>
60 internal long dataOffset
;
64 private Directory directory
;
65 private System
.String fileName
;
66 private System
.Collections
.Hashtable ids
;
67 private System
.Collections
.ArrayList entries
;
68 private bool merged
= false;
71 /// <summary>Create the compound stream in the specified file. The file name is the
72 /// entire name (no extensions are added).
74 public CompoundFileWriter(Directory dir
, System
.String name
)
77 throw new System
.ArgumentException("Missing directory");
79 throw new System
.ArgumentException("Missing name");
83 ids
= new System
.Collections
.Hashtable();
84 entries
= new System
.Collections
.ArrayList();
87 /// <summary>Returns the directory of the compound file. </summary>
88 public Directory
GetDirectory()
93 /// <summary>Returns the name of the compound file. </summary>
94 public System
.String
GetName()
99 /// <summary>Add a source stream. If sourceDir is null, it is set to the
100 /// same value as the directory where this compound stream exists.
101 /// The id is the string by which the sub-stream will be know in the
102 /// compound stream. The caller must ensure that the ID is unique. If the
103 /// id is null, it is set to the name of the source file.
105 public void AddFile(System
.String file
)
108 throw new System
.SystemException("Can't add extensions after merge has been called");
111 throw new System
.ArgumentException("Missing source file");
119 throw new System
.ArgumentException("File " + file
+ " already added");
122 FileEntry entry
= new FileEntry();
127 /// <summary>Merge files with the extensions added up to now.
128 /// All files with these extensions are combined sequentially into the
129 /// compound stream. After successful merge, the source files
135 throw new System
.SystemException("Merge already performed");
137 if ((entries
.Count
== 0))
138 throw new System
.SystemException("No entries to merge have been defined");
142 // open the compound stream
143 OutputStream os
= null;
146 os
= directory
.CreateFile(fileName
);
148 // Write the number of entries
149 os
.WriteVInt(entries
.Count
);
151 // Write the directory with all offsets at 0.
152 // Remember the positions of directory entries so that we can
153 // adjust the offsets later
154 System
.Collections
.IEnumerator it
= entries
.GetEnumerator();
155 while (it
.MoveNext())
157 FileEntry fe
= (FileEntry
) it
.Current
;
158 fe
.directoryOffset
= os
.GetFilePointer();
159 os
.WriteLong(0); // for now
160 os
.WriteString(fe
.file
);
163 // Open the files and copy their data into the stream.
164 // Remeber the locations of each file's data section.
165 byte[] buffer
= new byte[1024];
166 it
= entries
.GetEnumerator();
167 while (it
.MoveNext())
169 FileEntry fe
= (FileEntry
) it
.Current
;
170 fe
.dataOffset
= os
.GetFilePointer();
171 CopyFile(fe
, os
, buffer
);
174 // Write the data offsets into the directory of the compound stream
175 it
= entries
.GetEnumerator();
176 while (it
.MoveNext())
178 FileEntry fe
= (FileEntry
) it
.Current
;
179 os
.Seek(fe
.directoryOffset
);
180 os
.WriteLong(fe
.dataOffset
);
183 // Close the output stream. Set the os to null before trying to
184 // close so that if an exception occurs during the close, the
185 // finally clause below will not attempt to close the stream
187 OutputStream tmp
= os
;
198 catch (System
.IO
.IOException
)
204 /// <summary>Copy the contents of the file with specified extension into the
205 /// provided output stream. Use the provided buffer for moving data
206 /// to reduce memory allocation.
208 private void CopyFile(FileEntry source
, OutputStream os
, byte[] buffer
)
210 InputStream is_Renamed
= null;
213 long startPtr
= os
.GetFilePointer();
215 is_Renamed
= directory
.OpenFile(source
.file
);
216 long length
= is_Renamed
.Length();
217 long remainder
= length
;
218 int chunk
= buffer
.Length
;
220 while (remainder
> 0)
222 int len
= (int) System
.Math
.Min(chunk
, remainder
);
223 is_Renamed
.ReadBytes(buffer
, 0, len
);
224 os
.WriteBytes(buffer
, len
);
228 // Verify that remainder is 0
230 throw new System
.IO
.IOException("Non-zero remainder length after copying: " + remainder
+ " (id: " + source
.file
+ ", length: " + length
+ ", buffer size: " + chunk
+ ")");
232 // Verify that the output length diff is equal to original file
233 long endPtr
= os
.GetFilePointer();
234 long diff
= endPtr
- startPtr
;
236 throw new System
.IO
.IOException("Difference in the output file offsets " + diff
+ " does not match the original file length " + length
);
240 if (is_Renamed
!= null)