Initial commit. FBReader 0.8.12
[lbook_fbreader.git] / fbreader / src / formats / chm / CHMFile.cpp
blob3a894df01c3cb54a5ebe4c747124112c17776b26
1 /*
2 * Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
20 #include <string.h>
22 #include <ZLFile.h>
23 #include <ZLStringUtil.h>
24 #include <ZLUnicodeUtil.h>
25 #include <ZLInputStream.h>
27 #include "CHMFile.h"
28 #include "CHMReferenceCollection.h"
30 #include "LZXDecompressor.h"
32 static std::string readString(ZLInputStream &stream, size_t length) {
33 std::string string(length, ' ');
34 stream.read(const_cast<char*>(string.data()), length);
35 return string;
38 static unsigned short readUnsignedWord(ZLInputStream &stream) {
39 unsigned char buffer[2];
40 stream.read((char*)buffer, 2);
41 unsigned short result = buffer[1];
42 result = result << 8;
43 result += buffer[0];
44 return result;
47 static unsigned long readUnsignedDWord(ZLInputStream &stream) {
48 unsigned long lowPart = readUnsignedWord(stream);
49 unsigned long highPart = readUnsignedWord(stream);
50 return (highPart << 16) + lowPart;
53 static unsigned long long readUnsignedQWord(ZLInputStream &stream) {
54 unsigned long long lowPart = readUnsignedDWord(stream);
55 unsigned long long highPart = readUnsignedDWord(stream);
56 return (highPart << 32) + lowPart;
59 static unsigned long long readEncodedInteger(ZLInputStream &stream) {
60 unsigned long long result = 0;
61 char part;
62 do {
63 result = result << 7;
64 stream.read(&part, 1);
65 result += part & 0x7F;
66 } while (part & -0x80);
67 return result;
70 CHMInputStream::CHMInputStream(shared_ptr<ZLInputStream> base, const CHMFileInfo::SectionInfo &sectionInfo, size_t offset, size_t size) : myBase(base), mySectionInfo(sectionInfo), mySize(size) {
71 myBaseStartIndex = offset / 0x8000;
72 myBaseStartIndex -= myBaseStartIndex % sectionInfo.ResetInterval;
73 myBytesToSkip = offset - myBaseStartIndex * 0x8000;
74 myOutData = new unsigned char[0x8000];
77 CHMInputStream::~CHMInputStream() {
78 close();
79 delete[] myOutData;
82 bool CHMInputStream::open() {
83 myOffset = 0;
84 myDoSkip = true;
85 myBaseIndex = myBaseStartIndex;
86 if (myDecompressor.isNull()) {
87 myDecompressor = new LZXDecompressor(mySectionInfo.WindowSizeIndex);
88 } else {
89 myDecompressor->reset();
91 myOutDataOffset = 0;
92 myOutDataLength = 0;
93 return true;
96 size_t CHMInputStream::read(char *buffer, size_t maxSize) {
97 if (myDoSkip) {
98 do_read(0, myBytesToSkip);
99 myDoSkip = false;
101 size_t realSize = do_read(buffer, std::min(maxSize, mySize - myOffset));
102 myOffset += realSize;
103 return realSize;
106 size_t CHMInputStream::do_read(char *buffer, size_t maxSize) {
107 size_t realSize = 0;
108 do {
109 if (myOutDataLength == 0) {
110 if (myBaseIndex >= mySectionInfo.ResetTable.size()) {
111 break;
113 const bool isTail = myBaseIndex + 1 == mySectionInfo.ResetTable.size();
114 const size_t start = mySectionInfo.ResetTable[myBaseIndex];
115 const size_t end = isTail ? mySectionInfo.CompressedSize : mySectionInfo.ResetTable[myBaseIndex + 1];
116 myOutDataLength = isTail ? mySectionInfo.UncompressedSize % 0x8000 : 0x8000;
117 myOutDataOffset = 0;
119 myInData.erase();
120 myInData.append(end - start, '\0');
121 myBase->seek(mySectionInfo.Offset + start, true);
122 myBase->read((char*)myInData.data(), myInData.length());
123 if (myBaseIndex % mySectionInfo.ResetInterval == 0) {
124 myDecompressor->reset();
126 ++myBaseIndex;
128 if (!myDecompressor->decompress(myInData, myOutData, myOutDataLength)) {
129 break;
132 const size_t partSize = std::min(myOutDataLength, maxSize);
133 if (buffer != 0) {
134 memcpy(buffer + realSize, myOutData + myOutDataOffset, partSize);
136 maxSize -= partSize;
137 realSize += partSize;
138 myOutDataLength -= partSize;
139 myOutDataOffset += partSize;
140 } while (maxSize != 0);
141 return realSize;
144 void CHMInputStream::close() {
145 myDecompressor = 0;
148 void CHMInputStream::seek(int offset, bool absoluteOffset) {
149 if (absoluteOffset) {
150 offset -= myOffset;
152 if (offset > 0) {
153 read(0, offset);
154 } else if (offset < 0) {
155 open();
156 read(0, std::max(offset + (int)myOffset, 0));
160 size_t CHMInputStream::offset() const {
161 return myOffset;
164 size_t CHMInputStream::sizeOfOpened() {
165 return mySize;
168 shared_ptr<ZLInputStream> CHMFileInfo::entryStream(shared_ptr<ZLInputStream> base, const std::string &name) const {
169 RecordMap::const_iterator it = myRecords.find(ZLUnicodeUtil::toLower(name));
170 if (it == myRecords.end()) {
171 return 0;
173 const RecordInfo &recordInfo = it->second;
174 if (recordInfo.Length == 0) {
175 return 0;
177 if (recordInfo.Section == 0) {
178 // TODO: implement
179 return 0;
181 if (recordInfo.Section > mySectionInfos.size()) {
182 return 0;
184 const SectionInfo &sectionInfo = mySectionInfos[recordInfo.Section - 1];
185 if (recordInfo.Offset + recordInfo.Length > sectionInfo.UncompressedSize) {
186 return 0;
189 return new CHMInputStream(base, sectionInfo, recordInfo.Offset, recordInfo.Length);
192 CHMFileInfo::CHMFileInfo(const std::string &fileName) : myFileName(fileName) {
195 bool CHMFileInfo::moveToEntry(ZLInputStream &stream, const std::string &entryName) {
196 RecordMap::const_iterator it = myRecords.find(entryName);
197 if (it == myRecords.end()) {
198 return false;
200 RecordInfo recordInfo = it->second;
201 if (recordInfo.Section > mySectionInfos.size()) {
202 return false;
204 if (recordInfo.Section != 0) {
205 // TODO: ???
206 return false;
209 stream.seek(mySection0Offset + recordInfo.Offset, true);
210 return true;
213 bool CHMFileInfo::init(ZLInputStream &stream) {
215 // header start
216 if (readString(stream, 4) != "ITSF") {
217 return false;
220 unsigned long version = readUnsignedDWord(stream);
222 // DWORD total length
223 // DWORD unknown
224 // DWORD timestamp
225 // DWORD language id
226 // 0x10 bytes 1st GUID
227 // 0x10 bytes 2nd GUID
228 // QWORD section 0 offset
229 // QWORD section 0 length
230 stream.seek(4 * 4 + 2 * 0x10 + 2 * 8, false);
232 unsigned long long sectionOffset1 = readUnsignedQWord(stream);
233 unsigned long long sectionLength1 = readUnsignedQWord(stream);
234 mySection0Offset = sectionOffset1 + sectionLength1;
235 // header end
237 // additional header data start
238 if (version > 2) {
239 mySection0Offset = readUnsignedQWord(stream);
241 // additional header data end
243 stream.seek(sectionOffset1, true);
244 // header section 1 start
245 // directory header start
246 if (readString(stream, 4) != "ITSP") {
247 return false;
250 // DWORD version
251 // DWORD length
252 // DWORD 0x000A
253 // DWORD chunk size
254 // DWORD density
255 // DWORD depth
256 // DWORD root chunk number
257 // DWORD first chunk number
258 // DWORD last chunk number
259 // DWORD -1
260 stream.seek(10 * 4, false);
261 unsigned long dirChunkNumber = readUnsignedDWord(stream);
262 // ...
263 stream.seek(36, false);
264 // header section 1 end
266 size_t nextOffset = stream.offset();
267 for (unsigned long i = 0; i < dirChunkNumber; ++i) {
268 nextOffset += 4096;
269 std::string header = readString(stream, 4);
270 if (header == "PMGL") {
271 unsigned long quickRefAreaSize = readUnsignedDWord(stream) % 4096;
272 stream.seek(12, false);
273 size_t startOffset = stream.offset();
274 size_t oldOffset = startOffset;
275 while (startOffset < nextOffset - quickRefAreaSize) {
276 int nameLength = readEncodedInteger(stream);
277 std::string name = readString(stream, nameLength);
278 int contentSection = readEncodedInteger(stream);
279 int offset = readEncodedInteger(stream);
280 int length = readEncodedInteger(stream);
281 if (name.substr(0, 2) != "::") {
282 name = ZLUnicodeUtil::toLower(name);
284 myRecords.insert(
285 std::pair<std::string,CHMFileInfo::RecordInfo>(
286 name,
287 CHMFileInfo::RecordInfo(contentSection, offset, length)
290 startOffset = stream.offset();
291 if (oldOffset == startOffset) {
292 break;
294 oldOffset = startOffset;
296 } else if (header == "PMGI") {
297 unsigned long quickRefAreaSize = readUnsignedDWord(stream);
298 size_t startOffset = stream.offset();
299 size_t oldOffset = startOffset;
300 while (startOffset < nextOffset - quickRefAreaSize) {
301 int nameLength = readEncodedInteger(stream);
302 std::string name = readString(stream, nameLength);
303 // chunk number
304 readEncodedInteger(stream);
305 startOffset = stream.offset();
306 if (oldOffset == startOffset) {
307 break;
309 oldOffset = startOffset;
312 stream.seek(nextOffset, true);
313 if (stream.offset() != nextOffset) {
314 break;
320 if (!moveToEntry(stream, "::DataSpace/NameList")) {
321 return false;
323 stream.seek(2, false);
324 const int sectionNumber = readUnsignedWord(stream);
325 for (int i = 0; i < sectionNumber; ++i) {
326 const int length = readUnsignedWord(stream);
327 std::string sectionName;
328 sectionName.reserve(length);
329 for (int j = 0; j < length; ++j) {
330 sectionName += (char)readUnsignedWord(stream);
332 stream.seek(2, false);
333 mySectionNames.push_back(sectionName);
338 for (unsigned int i = 1; i < mySectionNames.size(); ++i) {
339 RecordMap::const_iterator it =
340 myRecords.find("::DataSpace/Storage/" + mySectionNames[i] + "/Content");
341 if (it == myRecords.end()) {
342 return false;
344 RecordInfo recordInfo = it->second;
345 if (recordInfo.Section != 0) {
346 return false;
348 mySectionInfos.push_back(SectionInfo());
349 SectionInfo &info = mySectionInfos.back();
350 info.Offset = mySection0Offset + recordInfo.Offset;
351 info.Length = recordInfo.Length;
353 if (!moveToEntry(stream, "::DataSpace/Storage/" + mySectionNames[i] + "/ControlData")) {
354 return false;
356 stream.seek(4, false);
357 std::string lzxc = readString(stream, 4);
358 if (lzxc != "LZXC") {
359 return false;
361 const int version = readUnsignedDWord(stream);
362 if ((version <= 0) || (version > 2)) {
363 return false;
365 info.ResetInterval = readUnsignedDWord(stream);
366 if (version == 1) {
367 info.ResetInterval /= 0x8000;
369 info.WindowSizeIndex = (version == 1) ? 0 : 15;
371 int ws = readUnsignedDWord(stream);
372 if (ws > 0) {
373 while ((ws & 1) == 0) {
374 ws >>= 1;
375 info.WindowSizeIndex++;
380 if (!moveToEntry(stream, "::DataSpace/Storage/" + mySectionNames[i] + "/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable")) {
381 return false;
383 stream.seek(4, false);
384 const size_t entriesNumber = readUnsignedDWord(stream);
385 if (entriesNumber == 0) {
386 return false;
388 if (entriesNumber > 2048) {
389 // file size is greater than 60 Mb
390 return false;
392 info.ResetTable.reserve(entriesNumber);
393 stream.seek(8, false);
394 info.UncompressedSize = readUnsignedQWord(stream);
395 if ((info.UncompressedSize - 1) / 0x8000 != entriesNumber - 1) {
396 return false;
398 info.CompressedSize = readUnsignedQWord(stream);
399 stream.seek(8, false);
400 size_t previous = 0;
401 for (size_t j = 0; j < entriesNumber; ++j) {
402 size_t value = readUnsignedQWord(stream);
403 if ((j > 0) == (value <= previous)) {
404 return false;
406 info.ResetTable.push_back(value);
407 previous = value;
412 return true;
415 static std::string readNTString(ZLInputStream &stream) {
416 std::string s;
417 char c;
418 while (stream.read(&c, 1) == 1) {
419 if (c == '\0') {
420 break;
421 } else {
422 s += c;
425 return CHMReferenceCollection::fullReference("/", s);
428 bool CHMFileInfo::FileNames::empty() const {
429 return Start.empty() && TOC.empty() && Home.empty() && Index.empty();
432 CHMFileInfo::FileNames CHMFileInfo::sectionNames(shared_ptr<ZLInputStream> base) const {
433 FileNames names;
434 shared_ptr<ZLInputStream> stringsStream = entryStream(base, "/#STRINGS");
435 if (!stringsStream.isNull() && stringsStream->open()) {
436 std::vector<std::string> fileNames;
437 int tocIndex = -1;
438 int indexIndex = -1;
439 for (int i = 0; i < 12; ++i) {
440 std::string argument = readNTString(*stringsStream);
441 if (argument.empty() || (argument[argument.length() - 1] == '/')) {
442 continue;
444 if (myRecords.find(argument) == myRecords.end()) {
445 continue;
447 if ((tocIndex == -1) && ZLStringUtil::stringEndsWith(argument, ".hhc")) {
448 tocIndex = fileNames.size();
449 names.TOC = argument;
450 } else if ((indexIndex == -1) && ZLStringUtil::stringEndsWith(argument, ".hhk")) {
451 indexIndex = fileNames.size();
452 names.Index = argument;
454 fileNames.push_back(argument);
456 size_t startIndex = std::max(3, std::max(tocIndex, indexIndex) + 1);
457 if (startIndex < 11) {
458 if (startIndex < fileNames.size()) {
459 names.Start = fileNames[startIndex];
461 if (startIndex + 1 < fileNames.size()) {
462 names.Home = fileNames[startIndex + 1];
465 stringsStream->close();
467 if (names.TOC.empty()) {
468 for (RecordMap::const_iterator it = myRecords.begin(); it != myRecords.end(); ++it) {
469 if (ZLStringUtil::stringEndsWith(it->first, ".hhc")) {
470 names.TOC = it->first;
471 break;
475 if (names.empty()) {
476 for (RecordMap::const_iterator it = myRecords.begin(); it != myRecords.end(); ++it) {
477 if ((ZLStringUtil::stringEndsWith(it->first, ".htm")) ||
478 (ZLStringUtil::stringEndsWith(it->first, ".html"))) {
479 names.Start = it->first;
480 break;
485 return names;
488 const std::string CHMFileInfo::fileName() const {
489 return myFileName;