First post!
[beagle.git] / Lucene.Net / Index / CompoundFileWriter.cs
blob39d7503528cee3360790ea6abd33c5fa26350a54
1 using System;
2 using System.Collections;
4 using Lucene.Net.Store;
6 namespace Lucene.Net.Index
8 /* ====================================================================
9 * The Apache Software License, Version 1.1
11 * Copyright (c) 2001 The Apache Software Foundation. All rights
12 * reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in
23 * the documentation and/or other materials provided with the
24 * distribution.
26 * 3. The end-user documentation included with the redistribution,
27 * if any, must include the following acknowledgment:
28 * "This product includes software developed by the
29 * Apache Software Foundation (http://www.apache.org/)."
30 * Alternately, this acknowledgment may appear in the software itself,
31 * if and wherever such third-party acknowledgments normally appear.
33 * 4. The names "Apache" and "Apache Software Foundation" and
34 * "Apache Lucene" must not be used to endorse or promote products
35 * derived from this software without prior written permission. For
36 * written permission, please contact apache@apache.org.
38 * 5. Products derived from this software may not be called "Apache",
39 * "Apache Lucene", nor may "Apache" appear in their name, without
40 * prior written permission of the Apache Software Foundation.
42 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
43 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
44 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
45 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
46 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
49 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
50 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
51 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
52 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 * ====================================================================
56 * This software consists of voluntary contributions made by many
57 * individuals on behalf of the Apache Software Foundation. For more
58 * information on the Apache Software Foundation, please see
59 * <http://www.apache.org/>.
62 /// <summary>
63 /// Combines multiple files into a single compound file.
64 /// The file format:<br/>
65 /// <ul>
66 /// <li>VInt fileCount</li>
67 /// <li>{Directory}
68 /// fileCount entries with the following structure:</li>
69 /// <ul>
70 /// <li>long dataOffset</li>
71 /// <li>UTFString extension</li>
72 /// </ul>
73 /// <li>{File Data}
74 /// fileCount entries with the raw data of the corresponding file</li>
75 /// </ul>
76 ///
77 /// The fileCount integer indicates how many files are contained in this compound
78 /// file. The {directory} that follows has that many entries. Each directory entry
79 /// contains an encoding identifier, an long pointer to the start of this file's
80 /// data section, and a UTF String with that file's extension.
81 ///
82 /// @author Dmitry Serebrennikov
83 /// @version $Id: CompoundFileWriter.cs,v 1.1.1.1 2004/04/29 22:53:51 trow Exp $
84 /// </summary>
85 public sealed class CompoundFileWriter
88 private sealed class FileEntry
90 /// <summary>
91 /// source file
92 /// </summary>
93 internal String file;
95 /// <summary>
96 /// temporary holder for the start of directory entry for this file
97 /// </summary>
98 internal long directoryOffset;
100 /// <summary>
101 /// temporary holder for the start of this file's data section
102 /// </summary>
103 internal long dataOffset;
107 private Directory directory;
108 private String fileName;
109 private Hashtable ids;
110 private ArrayList entries;
111 private bool merged = false;
114 /// <summary>
115 /// Create the compound stream in the specified file. The file name is the
116 /// entire name (no extensions are added).
117 /// </summary>
118 public CompoundFileWriter(Directory dir, String name)
120 if (dir == null)
121 throw new ArgumentException("Missing directory");
122 if (name == null)
123 throw new ArgumentException("Missing name");
125 directory = dir;
126 fileName = name;
127 ids = new Hashtable();
128 entries = new ArrayList();
131 /// <summary>
132 /// Returns the directory of the compound file.
133 /// </summary>
134 public Directory GetDirectory()
136 return directory;
139 /// <summary>
140 /// Returns the name of the compound file.
141 /// </summary>
142 public String GetName()
144 return fileName;
147 /// <summary>
148 /// Add a source stream. If sourceDir is null, it is set to the
149 /// same value as the directory where this compound stream exists.
150 /// The id is the string by which the sub-stream will be know in the
151 /// compound stream. The caller must ensure that the ID is unique. If the
152 /// id is null, it is set to the name of the source file.
153 /// </summary>
154 public void AddFile(String file)
156 if (merged)
157 throw new InvalidOperationException(
158 "Can't add extensions after merge has been called");
160 if (file == null)
161 throw new ArgumentException(
162 "Missing source file");
166 ids.Add(file, file);
168 catch(Exception e)
170 throw new ArgumentException(
171 "File " + file + " already added", e);
174 FileEntry entry = new FileEntry();
175 entry.file = file;
176 entries.Add(entry);
179 /// <summary>
180 /// Merge files with the extensions added up to now.
181 /// All files with these extensions are combined sequentially into the
182 /// compound stream. After successful merge, the source files
183 /// are deleted.
184 /// </summary>
185 public void Close()
187 if (merged)
188 throw new InvalidOperationException(
189 "Merge already performed");
191 if (entries.Count == 0)
192 throw new InvalidOperationException(
193 "No entries to merge have been defined");
195 merged = true;
197 // open the compound stream
198 OutputStream os = null;
199 try
201 os = directory.CreateFile(fileName);
203 // Write the number of entries
204 os.WriteVInt(entries.Count);
206 // Write the directory with all offsets at 0.
207 // Remember the positions of directory entries so that we can
208 // adjust the offsets later
209 foreach(FileEntry fe in entries)
211 fe.directoryOffset = os.GetFilePointer();
212 os.WriteLong(0); // for now
213 os.WriteString(fe.file);
216 // Open the files and copy their data into the stream.
217 // Remeber the locations of each file's data section.
218 byte[] buffer = new byte[1024];
220 foreach(FileEntry fe in entries)
222 fe.dataOffset = os.GetFilePointer();
223 CopyFile(fe, os, buffer);
226 // Write the data offsets into the directory of the compound stream
227 foreach(FileEntry fe in entries)
229 os.Seek(fe.directoryOffset);
230 os.WriteLong(fe.dataOffset);
233 // Close the output stream. Set the os to null before trying to
234 // close so that if an exception occurs during the close, the
235 // finally clause below will not attempt to close the stream
236 // the second time.
237 OutputStream tmp = os;
238 os = null;
239 tmp.Close();
241 finally
243 if (os != null) try { os.Close(); }
244 catch (System.IO.IOException) { }
248 /// <summary>
249 /// Copy the contents of the file with specified extension into the
250 /// provided output stream. Use the provided buffer for moving data
251 /// to reduce memory allocation.
252 /// </summary>
253 private void CopyFile(FileEntry source, OutputStream os, byte[] buffer)
255 InputStream stream = null;
256 try
258 long startPtr = os.GetFilePointer();
260 stream = directory.OpenFile(source.file);
261 long length = stream.Length();
262 long remainder = length;
263 int chunk = buffer.Length;
265 while(remainder > 0)
267 int len = (int) Math.Min(chunk, remainder);
268 stream.ReadBytes(buffer, 0, len);
269 os.WriteBytes(buffer, len);
270 remainder -= len;
273 // Verify that remainder is 0
274 if (remainder != 0)
275 throw new System.IO.IOException(
276 "Non-zero remainder length after copying: " + remainder
277 + " (id: " + source.file + ", length: " + length
278 + ", buffer size: " + chunk + ")");
280 // Verify that the output length diff is equal to original file
281 long endPtr = os.GetFilePointer();
282 long diff = endPtr - startPtr;
283 if (diff != length)
284 throw new System.IO.IOException(
285 "Difference in the output file offsets " + diff
286 + " does not match the original file length " + length);
288 finally
290 if (stream != null) stream.Close();