Thumbnail file hits. Based on a patch from D Bera
[beagle.git] / beagled / Lucene.Net / Index / CompoundFileWriter.cs
blobdbd077a011ea3f0b3c5288f351e9f67601aee13a
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 using System;
17 using Directory = Lucene.Net.Store.Directory;
18 using InputStream = Lucene.Net.Store.InputStream;
19 using OutputStream = Lucene.Net.Store.OutputStream;
20 namespace Lucene.Net.Index
24 /// <summary> Combines multiple files into a single compound file.
25 /// The file format:<br>
26 /// <ul>
27 /// <li>VInt fileCount</li>
28 /// <li>{Directory}
29 /// fileCount entries with the following structure:</li>
30 /// <ul>
31 /// <li>long dataOffset</li>
32 /// <li>UTFString extension</li>
33 /// </ul>
34 /// <li>{File Data}
35 /// fileCount entries with the raw data of the corresponding file</li>
36 /// </ul>
37 ///
38 /// The fileCount integer indicates how many files are contained in this compound
39 /// file. The {directory} that follows has that many entries. Each directory entry
40 /// contains an encoding identifier, an long pointer to the start of this file's
41 /// data section, and a UTF String with that file's extension.
42 ///
43 /// </summary>
44 /// <author> Dmitry Serebrennikov
45 /// </author>
46 /// <version> $Id: CompoundFileWriter.cs,v 1.2 2005/01/17 19:54:29 joeshaw Exp $
47 /// </version>
48 sealed public class CompoundFileWriter
51 private sealed class FileEntry
53 /// <summary>source file </summary>
54 internal System.String file;
56 /// <summary>temporary holder for the start of directory entry for this file </summary>
57 internal long directoryOffset;
59 /// <summary>temporary holder for the start of this file's data section </summary>
60 internal long dataOffset;
64 private Directory directory;
65 private System.String fileName;
66 private System.Collections.Hashtable ids;
67 private System.Collections.ArrayList entries;
68 private bool merged = false;
71 /// <summary>Create the compound stream in the specified file. The file name is the
72 /// entire name (no extensions are added).
73 /// </summary>
74 public CompoundFileWriter(Directory dir, System.String name)
76 if (dir == null)
77 throw new System.ArgumentException("Missing directory");
78 if (name == null)
79 throw new System.ArgumentException("Missing name");
81 directory = dir;
82 fileName = name;
83 ids = new System.Collections.Hashtable();
84 entries = new System.Collections.ArrayList();
87 /// <summary>Returns the directory of the compound file. </summary>
88 public Directory GetDirectory()
90 return directory;
93 /// <summary>Returns the name of the compound file. </summary>
94 public System.String GetName()
96 return fileName;
99 /// <summary>Add a source stream. If sourceDir is null, it is set to the
100 /// same value as the directory where this compound stream exists.
101 /// The id is the string by which the sub-stream will be know in the
102 /// compound stream. The caller must ensure that the ID is unique. If the
103 /// id is null, it is set to the name of the source file.
104 /// </summary>
105 public void AddFile(System.String file)
107 if (merged)
108 throw new System.SystemException("Can't add extensions after merge has been called");
110 if (file == null)
111 throw new System.ArgumentException("Missing source file");
115 ids.Add(file, file);
117 catch (Exception)
119 throw new System.ArgumentException("File " + file + " already added");
122 FileEntry entry = new FileEntry();
123 entry.file = file;
124 entries.Add(entry);
127 /// <summary>Merge files with the extensions added up to now.
128 /// All files with these extensions are combined sequentially into the
129 /// compound stream. After successful merge, the source files
130 /// are deleted.
131 /// </summary>
132 public void Close()
134 if (merged)
135 throw new System.SystemException("Merge already performed");
137 if ((entries.Count == 0))
138 throw new System.SystemException("No entries to merge have been defined");
140 merged = true;
142 // open the compound stream
143 OutputStream os = null;
146 os = directory.CreateFile(fileName);
148 // Write the number of entries
149 os.WriteVInt(entries.Count);
151 // Write the directory with all offsets at 0.
152 // Remember the positions of directory entries so that we can
153 // adjust the offsets later
154 System.Collections.IEnumerator it = entries.GetEnumerator();
155 while (it.MoveNext())
157 FileEntry fe = (FileEntry) it.Current;
158 fe.directoryOffset = os.GetFilePointer();
159 os.WriteLong(0); // for now
160 os.WriteString(fe.file);
163 // Open the files and copy their data into the stream.
164 // Remeber the locations of each file's data section.
165 byte[] buffer = new byte[1024];
166 it = entries.GetEnumerator();
167 while (it.MoveNext())
169 FileEntry fe = (FileEntry) it.Current;
170 fe.dataOffset = os.GetFilePointer();
171 CopyFile(fe, os, buffer);
174 // Write the data offsets into the directory of the compound stream
175 it = entries.GetEnumerator();
176 while (it.MoveNext())
178 FileEntry fe = (FileEntry) it.Current;
179 os.Seek(fe.directoryOffset);
180 os.WriteLong(fe.dataOffset);
183 // Close the output stream. Set the os to null before trying to
184 // close so that if an exception occurs during the close, the
185 // finally clause below will not attempt to close the stream
186 // the second time.
187 OutputStream tmp = os;
188 os = null;
189 tmp.Close();
191 finally
193 if (os != null)
196 os.Close();
198 catch (System.IO.IOException)
204 /// <summary>Copy the contents of the file with specified extension into the
205 /// provided output stream. Use the provided buffer for moving data
206 /// to reduce memory allocation.
207 /// </summary>
208 private void CopyFile(FileEntry source, OutputStream os, byte[] buffer)
210 InputStream is_Renamed = null;
213 long startPtr = os.GetFilePointer();
215 is_Renamed = directory.OpenFile(source.file);
216 long length = is_Renamed.Length();
217 long remainder = length;
218 int chunk = buffer.Length;
220 while (remainder > 0)
222 int len = (int) System.Math.Min(chunk, remainder);
223 is_Renamed.ReadBytes(buffer, 0, len);
224 os.WriteBytes(buffer, len);
225 remainder -= len;
228 // Verify that remainder is 0
229 if (remainder != 0)
230 throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
232 // Verify that the output length diff is equal to original file
233 long endPtr = os.GetFilePointer();
234 long diff = endPtr - startPtr;
235 if (diff != length)
236 throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
238 finally
240 if (is_Renamed != null)
241 is_Renamed.Close();