2 * Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 #include <ZLStringUtil.h>
24 #include <ZLUnicodeUtil.h>
25 #include <ZLInputStream.h>
28 #include "CHMReferenceCollection.h"
30 #include "LZXDecompressor.h"
32 static std::string
readString(ZLInputStream
&stream
, size_t length
) {
33 std::string
string(length
, ' ');
34 stream
.read(const_cast<char*>(string
.data()), length
);
38 static unsigned short readUnsignedWord(ZLInputStream
&stream
) {
39 unsigned char buffer
[2];
40 stream
.read((char*)buffer
, 2);
41 unsigned short result
= buffer
[1];
47 static unsigned long readUnsignedDWord(ZLInputStream
&stream
) {
48 unsigned long lowPart
= readUnsignedWord(stream
);
49 unsigned long highPart
= readUnsignedWord(stream
);
50 return (highPart
<< 16) + lowPart
;
53 static unsigned long long readUnsignedQWord(ZLInputStream
&stream
) {
54 unsigned long long lowPart
= readUnsignedDWord(stream
);
55 unsigned long long highPart
= readUnsignedDWord(stream
);
56 return (highPart
<< 32) + lowPart
;
59 static unsigned long long readEncodedInteger(ZLInputStream
&stream
) {
60 unsigned long long result
= 0;
64 stream
.read(&part
, 1);
65 result
+= part
& 0x7F;
66 } while (part
& -0x80);
70 CHMInputStream::CHMInputStream(shared_ptr
<ZLInputStream
> base
, const CHMFileInfo::SectionInfo
§ionInfo
, size_t offset
, size_t size
) : myBase(base
), mySectionInfo(sectionInfo
), mySize(size
) {
71 myBaseStartIndex
= offset
/ 0x8000;
72 myBaseStartIndex
-= myBaseStartIndex
% sectionInfo
.ResetInterval
;
73 myBytesToSkip
= offset
- myBaseStartIndex
* 0x8000;
74 myOutData
= new unsigned char[0x8000];
77 CHMInputStream::~CHMInputStream() {
82 bool CHMInputStream::open() {
85 myBaseIndex
= myBaseStartIndex
;
86 if (myDecompressor
.isNull()) {
87 myDecompressor
= new LZXDecompressor(mySectionInfo
.WindowSizeIndex
);
89 myDecompressor
->reset();
96 size_t CHMInputStream::read(char *buffer
, size_t maxSize
) {
98 do_read(0, myBytesToSkip
);
101 size_t realSize
= do_read(buffer
, std::min(maxSize
, mySize
- myOffset
));
102 myOffset
+= realSize
;
106 size_t CHMInputStream::do_read(char *buffer
, size_t maxSize
) {
109 if (myOutDataLength
== 0) {
110 if (myBaseIndex
>= mySectionInfo
.ResetTable
.size()) {
113 const bool isTail
= myBaseIndex
+ 1 == mySectionInfo
.ResetTable
.size();
114 const size_t start
= mySectionInfo
.ResetTable
[myBaseIndex
];
115 const size_t end
= isTail
? mySectionInfo
.CompressedSize
: mySectionInfo
.ResetTable
[myBaseIndex
+ 1];
116 myOutDataLength
= isTail
? mySectionInfo
.UncompressedSize
% 0x8000 : 0x8000;
120 myInData
.append(end
- start
, '\0');
121 myBase
->seek(mySectionInfo
.Offset
+ start
, true);
122 myBase
->read((char*)myInData
.data(), myInData
.length());
123 if (myBaseIndex
% mySectionInfo
.ResetInterval
== 0) {
124 myDecompressor
->reset();
128 if (!myDecompressor
->decompress(myInData
, myOutData
, myOutDataLength
)) {
132 const size_t partSize
= std::min(myOutDataLength
, maxSize
);
134 memcpy(buffer
+ realSize
, myOutData
+ myOutDataOffset
, partSize
);
137 realSize
+= partSize
;
138 myOutDataLength
-= partSize
;
139 myOutDataOffset
+= partSize
;
140 } while (maxSize
!= 0);
144 void CHMInputStream::close() {
148 void CHMInputStream::seek(int offset
, bool absoluteOffset
) {
149 if (absoluteOffset
) {
154 } else if (offset
< 0) {
156 read(0, std::max(offset
+ (int)myOffset
, 0));
160 size_t CHMInputStream::offset() const {
164 size_t CHMInputStream::sizeOfOpened() {
168 shared_ptr
<ZLInputStream
> CHMFileInfo::entryStream(shared_ptr
<ZLInputStream
> base
, const std::string
&name
) const {
169 RecordMap::const_iterator it
= myRecords
.find(ZLUnicodeUtil::toLower(name
));
170 if (it
== myRecords
.end()) {
173 const RecordInfo
&recordInfo
= it
->second
;
174 if (recordInfo
.Length
== 0) {
177 if (recordInfo
.Section
== 0) {
181 if (recordInfo
.Section
> mySectionInfos
.size()) {
184 const SectionInfo
§ionInfo
= mySectionInfos
[recordInfo
.Section
- 1];
185 if (recordInfo
.Offset
+ recordInfo
.Length
> sectionInfo
.UncompressedSize
) {
189 return new CHMInputStream(base
, sectionInfo
, recordInfo
.Offset
, recordInfo
.Length
);
192 CHMFileInfo::CHMFileInfo(const std::string
&fileName
) : myFileName(fileName
) {
195 bool CHMFileInfo::moveToEntry(ZLInputStream
&stream
, const std::string
&entryName
) {
196 RecordMap::const_iterator it
= myRecords
.find(entryName
);
197 if (it
== myRecords
.end()) {
200 RecordInfo recordInfo
= it
->second
;
201 if (recordInfo
.Section
> mySectionInfos
.size()) {
204 if (recordInfo
.Section
!= 0) {
209 stream
.seek(mySection0Offset
+ recordInfo
.Offset
, true);
213 bool CHMFileInfo::init(ZLInputStream
&stream
) {
216 if (readString(stream
, 4) != "ITSF") {
220 unsigned long version
= readUnsignedDWord(stream
);
222 // DWORD total length
226 // 0x10 bytes 1st GUID
227 // 0x10 bytes 2nd GUID
228 // QWORD section 0 offset
229 // QWORD section 0 length
230 stream
.seek(4 * 4 + 2 * 0x10 + 2 * 8, false);
232 unsigned long long sectionOffset1
= readUnsignedQWord(stream
);
233 unsigned long long sectionLength1
= readUnsignedQWord(stream
);
234 mySection0Offset
= sectionOffset1
+ sectionLength1
;
237 // additional header data start
239 mySection0Offset
= readUnsignedQWord(stream
);
241 // additional header data end
243 stream
.seek(sectionOffset1
, true);
244 // header section 1 start
245 // directory header start
246 if (readString(stream
, 4) != "ITSP") {
256 // DWORD root chunk number
257 // DWORD first chunk number
258 // DWORD last chunk number
260 stream
.seek(10 * 4, false);
261 unsigned long dirChunkNumber
= readUnsignedDWord(stream
);
263 stream
.seek(36, false);
264 // header section 1 end
266 size_t nextOffset
= stream
.offset();
267 for (unsigned long i
= 0; i
< dirChunkNumber
; ++i
) {
269 std::string header
= readString(stream
, 4);
270 if (header
== "PMGL") {
271 unsigned long quickRefAreaSize
= readUnsignedDWord(stream
) % 4096;
272 stream
.seek(12, false);
273 size_t startOffset
= stream
.offset();
274 size_t oldOffset
= startOffset
;
275 while (startOffset
< nextOffset
- quickRefAreaSize
) {
276 int nameLength
= readEncodedInteger(stream
);
277 std::string name
= readString(stream
, nameLength
);
278 int contentSection
= readEncodedInteger(stream
);
279 int offset
= readEncodedInteger(stream
);
280 int length
= readEncodedInteger(stream
);
281 if (name
.substr(0, 2) != "::") {
282 name
= ZLUnicodeUtil::toLower(name
);
285 std::pair
<std::string
,CHMFileInfo::RecordInfo
>(
287 CHMFileInfo::RecordInfo(contentSection
, offset
, length
)
290 startOffset
= stream
.offset();
291 if (oldOffset
== startOffset
) {
294 oldOffset
= startOffset
;
296 } else if (header
== "PMGI") {
297 unsigned long quickRefAreaSize
= readUnsignedDWord(stream
);
298 size_t startOffset
= stream
.offset();
299 size_t oldOffset
= startOffset
;
300 while (startOffset
< nextOffset
- quickRefAreaSize
) {
301 int nameLength
= readEncodedInteger(stream
);
302 std::string name
= readString(stream
, nameLength
);
304 readEncodedInteger(stream
);
305 startOffset
= stream
.offset();
306 if (oldOffset
== startOffset
) {
309 oldOffset
= startOffset
;
312 stream
.seek(nextOffset
, true);
313 if (stream
.offset() != nextOffset
) {
320 if (!moveToEntry(stream
, "::DataSpace/NameList")) {
323 stream
.seek(2, false);
324 const int sectionNumber
= readUnsignedWord(stream
);
325 for (int i
= 0; i
< sectionNumber
; ++i
) {
326 const int length
= readUnsignedWord(stream
);
327 std::string sectionName
;
328 sectionName
.reserve(length
);
329 for (int j
= 0; j
< length
; ++j
) {
330 sectionName
+= (char)readUnsignedWord(stream
);
332 stream
.seek(2, false);
333 mySectionNames
.push_back(sectionName
);
338 for (unsigned int i
= 1; i
< mySectionNames
.size(); ++i
) {
339 RecordMap::const_iterator it
=
340 myRecords
.find("::DataSpace/Storage/" + mySectionNames
[i
] + "/Content");
341 if (it
== myRecords
.end()) {
344 RecordInfo recordInfo
= it
->second
;
345 if (recordInfo
.Section
!= 0) {
348 mySectionInfos
.push_back(SectionInfo());
349 SectionInfo
&info
= mySectionInfos
.back();
350 info
.Offset
= mySection0Offset
+ recordInfo
.Offset
;
351 info
.Length
= recordInfo
.Length
;
353 if (!moveToEntry(stream
, "::DataSpace/Storage/" + mySectionNames
[i
] + "/ControlData")) {
356 stream
.seek(4, false);
357 std::string lzxc
= readString(stream
, 4);
358 if (lzxc
!= "LZXC") {
361 const int version
= readUnsignedDWord(stream
);
362 if ((version
<= 0) || (version
> 2)) {
365 info
.ResetInterval
= readUnsignedDWord(stream
);
367 info
.ResetInterval
/= 0x8000;
369 info
.WindowSizeIndex
= (version
== 1) ? 0 : 15;
371 int ws
= readUnsignedDWord(stream
);
373 while ((ws
& 1) == 0) {
375 info
.WindowSizeIndex
++;
380 if (!moveToEntry(stream
, "::DataSpace/Storage/" + mySectionNames
[i
] + "/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable")) {
383 stream
.seek(4, false);
384 const size_t entriesNumber
= readUnsignedDWord(stream
);
385 if (entriesNumber
== 0) {
388 if (entriesNumber
> 2048) {
389 // file size is greater than 60 Mb
392 info
.ResetTable
.reserve(entriesNumber
);
393 stream
.seek(8, false);
394 info
.UncompressedSize
= readUnsignedQWord(stream
);
395 if ((info
.UncompressedSize
- 1) / 0x8000 != entriesNumber
- 1) {
398 info
.CompressedSize
= readUnsignedQWord(stream
);
399 stream
.seek(8, false);
401 for (size_t j
= 0; j
< entriesNumber
; ++j
) {
402 size_t value
= readUnsignedQWord(stream
);
403 if ((j
> 0) == (value
<= previous
)) {
406 info
.ResetTable
.push_back(value
);
415 static std::string
readNTString(ZLInputStream
&stream
) {
418 while (stream
.read(&c
, 1) == 1) {
425 return CHMReferenceCollection::fullReference("/", s
);
428 bool CHMFileInfo::FileNames::empty() const {
429 return Start
.empty() && TOC
.empty() && Home
.empty() && Index
.empty();
432 CHMFileInfo::FileNames
CHMFileInfo::sectionNames(shared_ptr
<ZLInputStream
> base
) const {
434 shared_ptr
<ZLInputStream
> stringsStream
= entryStream(base
, "/#STRINGS");
435 if (!stringsStream
.isNull() && stringsStream
->open()) {
436 std::vector
<std::string
> fileNames
;
439 for (int i
= 0; i
< 12; ++i
) {
440 std::string argument
= readNTString(*stringsStream
);
441 if (argument
.empty() || (argument
[argument
.length() - 1] == '/')) {
444 if (myRecords
.find(argument
) == myRecords
.end()) {
447 if ((tocIndex
== -1) && ZLStringUtil::stringEndsWith(argument
, ".hhc")) {
448 tocIndex
= fileNames
.size();
449 names
.TOC
= argument
;
450 } else if ((indexIndex
== -1) && ZLStringUtil::stringEndsWith(argument
, ".hhk")) {
451 indexIndex
= fileNames
.size();
452 names
.Index
= argument
;
454 fileNames
.push_back(argument
);
456 size_t startIndex
= std::max(3, std::max(tocIndex
, indexIndex
) + 1);
457 if (startIndex
< 11) {
458 if (startIndex
< fileNames
.size()) {
459 names
.Start
= fileNames
[startIndex
];
461 if (startIndex
+ 1 < fileNames
.size()) {
462 names
.Home
= fileNames
[startIndex
+ 1];
465 stringsStream
->close();
467 if (names
.TOC
.empty()) {
468 for (RecordMap::const_iterator it
= myRecords
.begin(); it
!= myRecords
.end(); ++it
) {
469 if (ZLStringUtil::stringEndsWith(it
->first
, ".hhc")) {
470 names
.TOC
= it
->first
;
476 for (RecordMap::const_iterator it
= myRecords
.begin(); it
!= myRecords
.end(); ++it
) {
477 if ((ZLStringUtil::stringEndsWith(it
->first
, ".htm")) ||
478 (ZLStringUtil::stringEndsWith(it
->first
, ".html"))) {
479 names
.Start
= it
->first
;
488 const std::string
CHMFileInfo::fileName() const {