cvsimport
[beagle.git] / beagled / Lucene.Net / Index / CompoundFileWriter.cs
blobb768488accea35b3f06a75dde9687d2f2fb1de14
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
18 using Directory = Lucene.Net.Store.Directory;
19 using IndexInput = Lucene.Net.Store.IndexInput;
20 using IndexOutput = Lucene.Net.Store.IndexOutput;
22 namespace Lucene.Net.Index
26 /// <summary> Combines multiple files into a single compound file.
27 /// The file format:<br>
28 /// <ul>
29 /// <li>VInt fileCount</li>
30 /// <li>{Directory}
31 /// fileCount entries with the following structure:</li>
32 /// <ul>
33 /// <li>long dataOffset</li>
34 /// <li>String fileName</li>
35 /// </ul>
36 /// <li>{File Data}
37 /// fileCount entries with the raw data of the corresponding file</li>
38 /// </ul>
39 ///
40 /// The fileCount integer indicates how many files are contained in this compound
41 /// file. The {directory} that follows has that many entries. Each directory entry
42 /// contains a long pointer to the start of this file's data section, and a String
43 /// with that file's name.
44 ///
45 /// </summary>
46 /// <author> Dmitry Serebrennikov
47 /// </author>
48 /// <version> $Id: CompoundFileWriter.cs,v 1.4 2006/10/02 17:08:52 joeshaw Exp $
49 /// </version>
50 public sealed class CompoundFileWriter
53 private sealed class FileEntry
55 /// <summary>source file </summary>
56 internal System.String file;
58 /// <summary>temporary holder for the start of directory entry for this file </summary>
59 internal long directoryOffset;
61 /// <summary>temporary holder for the start of this file's data section </summary>
62 internal long dataOffset;
66 private Directory directory;
67 private System.String fileName;
68 private System.Collections.Hashtable ids;
69 private System.Collections.ArrayList entries;
70 private bool merged = false;
73 /// <summary>Create the compound stream in the specified file. The file name is the
74 /// entire name (no extensions are added).
75 /// </summary>
76 /// <throws> NullPointerException if <code>dir</code> or <code>name</code> is null </throws>
77 public CompoundFileWriter(Directory dir, System.String name)
79 if (dir == null)
80 throw new System.NullReferenceException("directory cannot be null");
81 if (name == null)
82 throw new System.NullReferenceException("name cannot be null");
84 directory = dir;
85 fileName = name;
86 ids = new System.Collections.Hashtable();
87 entries = new System.Collections.ArrayList();
90 /// <summary>Returns the directory of the compound file. </summary>
91 public Directory GetDirectory()
93 return directory;
96 /// <summary>Returns the name of the compound file. </summary>
97 public System.String GetName()
99 return fileName;
102 /// <summary>Add a source stream. <code>file</code> is the string by which the
103 /// sub-stream will be known in the compound stream.
104 ///
105 /// </summary>
106 /// <throws> IllegalStateException if this writer is closed </throws>
107 /// <throws> NullPointerException if <code>file</code> is null </throws>
108 /// <throws> IllegalArgumentException if a file with the same name </throws>
109 /// <summary> has been added already
110 /// </summary>
111 public void AddFile(System.String file)
113 if (merged)
114 throw new System.SystemException("Can't add extensions after merge has been called");
116 if (file == null)
117 throw new System.NullReferenceException("file cannot be null");
121 ids.Add(file, file);
123 catch (Exception)
125 throw new System.ArgumentException("File " + file + " already added");
128 FileEntry entry = new FileEntry();
129 entry.file = file;
130 entries.Add(entry);
133 /// <summary>Merge files with the extensions added up to now.
134 /// All files with these extensions are combined sequentially into the
135 /// compound stream. After successful merge, the source files
136 /// are deleted.
137 /// </summary>
138 /// <throws> IllegalStateException if close() had been called before or </throws>
139 /// <summary> if no file has been added to this object
140 /// </summary>
141 public void Close()
143 if (merged)
144 throw new System.SystemException("Merge already performed");
146 if ((entries.Count == 0))
147 throw new System.SystemException("No entries to merge have been defined");
149 merged = true;
151 // open the compound stream
152 IndexOutput os = null;
155 os = directory.CreateOutput(fileName);
157 // Write the number of entries
158 os.WriteVInt(entries.Count);
160 // Write the directory with all offsets at 0.
161 // Remember the positions of directory entries so that we can
162 // adjust the offsets later
163 System.Collections.IEnumerator it = entries.GetEnumerator();
164 while (it.MoveNext())
166 FileEntry fe = (FileEntry) it.Current;
167 fe.directoryOffset = os.GetFilePointer();
168 os.WriteLong(0); // for now
169 os.WriteString(fe.file);
172 // Open the files and copy their data into the stream.
173 // Remember the locations of each file's data section.
174 byte[] buffer = new byte[1024];
175 it = entries.GetEnumerator();
176 while (it.MoveNext())
178 FileEntry fe = (FileEntry) it.Current;
179 fe.dataOffset = os.GetFilePointer();
180 CopyFile(fe, os, buffer);
183 // Write the data offsets into the directory of the compound stream
184 it = entries.GetEnumerator();
185 while (it.MoveNext())
187 FileEntry fe = (FileEntry) it.Current;
188 os.Seek(fe.directoryOffset);
189 os.WriteLong(fe.dataOffset);
192 // Close the output stream. Set the os to null before trying to
193 // close so that if an exception occurs during the close, the
194 // finally clause below will not attempt to close the stream
195 // the second time.
196 IndexOutput tmp = os;
197 os = null;
198 tmp.Close();
200 finally
202 if (os != null)
205 os.Close();
207 catch (System.IO.IOException)
213 /// <summary>Copy the contents of the file with specified extension into the
214 /// provided output stream. Use the provided buffer for moving data
215 /// to reduce memory allocation.
216 /// </summary>
217 private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
219 IndexInput is_Renamed = null;
222 long startPtr = os.GetFilePointer();
224 is_Renamed = directory.OpenInput(source.file);
225 long length = is_Renamed.Length();
226 long remainder = length;
227 int chunk = buffer.Length;
229 while (remainder > 0)
231 int len = (int) System.Math.Min(chunk, remainder);
232 is_Renamed.ReadBytes(buffer, 0, len);
233 os.WriteBytes(buffer, len);
234 remainder -= len;
237 // Verify that remainder is 0
238 if (remainder != 0)
239 throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
241 // Verify that the output length diff is equal to original file
242 long endPtr = os.GetFilePointer();
243 long diff = endPtr - startPtr;
244 if (diff != length)
245 throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
247 finally
249 if (is_Renamed != null)
250 is_Renamed.Close();