2 using System
.Collections
;
4 using Lucene
.Net
.Store
;
6 namespace Lucene
.Net
.Index
8 /* ====================================================================
9 * The Apache Software License, Version 1.1
11 * Copyright (c) 2001 The Apache Software Foundation. All rights
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in
23 * the documentation and/or other materials provided with the
26 * 3. The end-user documentation included with the redistribution,
27 * if any, must include the following acknowledgment:
28 * "This product includes software developed by the
29 * Apache Software Foundation (http://www.apache.org/)."
30 * Alternately, this acknowledgment may appear in the software itself,
31 * if and wherever such third-party acknowledgments normally appear.
33 * 4. The names "Apache" and "Apache Software Foundation" and
34 * "Apache Lucene" must not be used to endorse or promote products
35 * derived from this software without prior written permission. For
36 * written permission, please contact apache@apache.org.
38 * 5. Products derived from this software may not be called "Apache",
39 * "Apache Lucene", nor may "Apache" appear in their name, without
40 * prior written permission of the Apache Software Foundation.
42 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
43 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
44 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
45 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
46 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
49 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
50 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
51 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
52 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * ====================================================================
56 * This software consists of voluntary contributions made by many
57 * individuals on behalf of the Apache Software Foundation. For more
58 * information on the Apache Software Foundation, please see
59 * <http://www.apache.org/>.
63 /// Combines multiple files into a single compound file.
64 /// The file format:<br/>
66 /// <li>VInt fileCount</li>
68 /// fileCount entries with the following structure:</li>
70 /// <li>long dataOffset</li>
71 /// <li>UTFString extension</li>
74 /// fileCount entries with the raw data of the corresponding file</li>
77 /// The fileCount integer indicates how many files are contained in this compound
78 /// file. The {directory} that follows has that many entries. Each directory entry
79 /// contains an encoding identifier, an long pointer to the start of this file's
80 /// data section, and a UTF String with that file's extension.
82 /// @author Dmitry Serebrennikov
83 /// @version $Id: CompoundFileWriter.cs,v 1.1.1.1 2004/04/29 22:53:51 trow Exp $
85 public sealed class CompoundFileWriter
88 private sealed class FileEntry
96 /// temporary holder for the start of directory entry for this file
98 internal long directoryOffset
;
101 /// temporary holder for the start of this file's data section
103 internal long dataOffset
;
107 private Directory directory
;
108 private String fileName
;
109 private Hashtable ids
;
110 private ArrayList entries
;
111 private bool merged
= false;
115 /// Create the compound stream in the specified file. The file name is the
116 /// entire name (no extensions are added).
118 public CompoundFileWriter(Directory dir
, String name
)
121 throw new ArgumentException("Missing directory");
123 throw new ArgumentException("Missing name");
127 ids
= new Hashtable();
128 entries
= new ArrayList();
132 /// Returns the directory of the compound file.
134 public Directory
GetDirectory()
140 /// Returns the name of the compound file.
142 public String
GetName()
148 /// Add a source stream. If sourceDir is null, it is set to the
149 /// same value as the directory where this compound stream exists.
150 /// The id is the string by which the sub-stream will be know in the
151 /// compound stream. The caller must ensure that the ID is unique. If the
152 /// id is null, it is set to the name of the source file.
154 public void AddFile(String file
)
157 throw new InvalidOperationException(
158 "Can't add extensions after merge has been called");
161 throw new ArgumentException(
162 "Missing source file");
170 throw new ArgumentException(
171 "File " + file
+ " already added", e
);
174 FileEntry entry
= new FileEntry();
180 /// Merge files with the extensions added up to now.
181 /// All files with these extensions are combined sequentially into the
182 /// compound stream. After successful merge, the source files
188 throw new InvalidOperationException(
189 "Merge already performed");
191 if (entries
.Count
== 0)
192 throw new InvalidOperationException(
193 "No entries to merge have been defined");
197 // open the compound stream
198 OutputStream os
= null;
201 os
= directory
.CreateFile(fileName
);
203 // Write the number of entries
204 os
.WriteVInt(entries
.Count
);
206 // Write the directory with all offsets at 0.
207 // Remember the positions of directory entries so that we can
208 // adjust the offsets later
209 foreach(FileEntry fe
in entries
)
211 fe
.directoryOffset
= os
.GetFilePointer();
212 os
.WriteLong(0); // for now
213 os
.WriteString(fe
.file
);
216 // Open the files and copy their data into the stream.
217 // Remeber the locations of each file's data section.
218 byte[] buffer
= new byte[1024];
220 foreach(FileEntry fe
in entries
)
222 fe
.dataOffset
= os
.GetFilePointer();
223 CopyFile(fe
, os
, buffer
);
226 // Write the data offsets into the directory of the compound stream
227 foreach(FileEntry fe
in entries
)
229 os
.Seek(fe
.directoryOffset
);
230 os
.WriteLong(fe
.dataOffset
);
233 // Close the output stream. Set the os to null before trying to
234 // close so that if an exception occurs during the close, the
235 // finally clause below will not attempt to close the stream
237 OutputStream tmp
= os
;
243 if (os
!= null) try { os.Close(); }
244 catch (System
.IO
.IOException
) { }
249 /// Copy the contents of the file with specified extension into the
250 /// provided output stream. Use the provided buffer for moving data
251 /// to reduce memory allocation.
253 private void CopyFile(FileEntry source
, OutputStream os
, byte[] buffer
)
255 InputStream stream
= null;
258 long startPtr
= os
.GetFilePointer();
260 stream
= directory
.OpenFile(source
.file
);
261 long length
= stream
.Length();
262 long remainder
= length
;
263 int chunk
= buffer
.Length
;
267 int len
= (int) Math
.Min(chunk
, remainder
);
268 stream
.ReadBytes(buffer
, 0, len
);
269 os
.WriteBytes(buffer
, len
);
273 // Verify that remainder is 0
275 throw new System
.IO
.IOException(
276 "Non-zero remainder length after copying: " + remainder
277 + " (id: " + source
.file
+ ", length: " + length
278 + ", buffer size: " + chunk
+ ")");
280 // Verify that the output length diff is equal to original file
281 long endPtr
= os
.GetFilePointer();
282 long diff
= endPtr
- startPtr
;
284 throw new System
.IO
.IOException(
285 "Difference in the output file offsets " + diff
286 + " does not match the original file length " + length
);
290 if (stream
!= null) stream
.Close();