fbreader/src/formats/chm/CHMFile.cpp

   1 /*
   2  * Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com>
   3  *
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17  * 02110-1301, USA.
  18  */
  19
  20 #include <string.h>
  21
  22 #include <ZLFile.h>
  23 #include <ZLStringUtil.h>
  24 #include <ZLUnicodeUtil.h>
  25 #include <ZLInputStream.h>
  26
  27 #include "CHMFile.h"
  28 #include "CHMReferenceCollection.h"
  29
  30 #include "LZXDecompressor.h"
  31
  32 static std::string readString(ZLInputStream &stream, size_t length) {
  33         std::string string(length, ' ');
  34         stream.read(const_cast<char*>(string.data()), length);
  35         return string;
  36 }
  37
  38 static unsigned short readUnsignedWord(ZLInputStream &stream) {
  39         unsigned char buffer[2];
  40         stream.read((char*)buffer, 2);
  41         unsigned short result = buffer[1];
  42         result = result << 8;
  43         result += buffer[0];
  44         return result;
  45 }
  46
  47 static unsigned long readUnsignedDWord(ZLInputStream &stream) {
  48         unsigned long lowPart = readUnsignedWord(stream);
  49         unsigned long highPart = readUnsignedWord(stream);
  50         return (highPart << 16) + lowPart;
  51 }
  52
  53 static unsigned long long readUnsignedQWord(ZLInputStream &stream) {
  54         unsigned long long lowPart = readUnsignedDWord(stream);
  55         unsigned long long highPart = readUnsignedDWord(stream);
  56         return (highPart << 32) + lowPart;
  57 }
  58
  59 static unsigned long long readEncodedInteger(ZLInputStream &stream) {
  60         unsigned long long result = 0;
  61         char part;
  62         do {
  63                 result = result << 7;
  64                 stream.read(&part, 1);
  65                 result += part & 0x7F;
  66         } while (part & -0x80);
  67         return result;
  68 }
  69
  70 CHMInputStream::CHMInputStream(shared_ptr<ZLInputStream> base, const CHMFileInfo::SectionInfo &sectionInfo, size_t offset, size_t size) : myBase(base), mySectionInfo(sectionInfo), mySize(size) {
  71         myBaseStartIndex = offset / 0x8000;
  72         myBaseStartIndex -= myBaseStartIndex % sectionInfo.ResetInterval;
  73         myBytesToSkip = offset - myBaseStartIndex * 0x8000;
  74         myOutData = new unsigned char[0x8000];
  75 }
  76
  77 CHMInputStream::~CHMInputStream() {
  78         close();
  79         delete[] myOutData;
  80 }
  81
  82 bool CHMInputStream::open() {
  83         myOffset = 0;
  84         myDoSkip = true;
  85         myBaseIndex = myBaseStartIndex;
  86         if (myDecompressor.isNull()) {
  87                 myDecompressor = new LZXDecompressor(mySectionInfo.WindowSizeIndex);
  88         } else {
  89                 myDecompressor->reset();
  90         }
  91         myOutDataOffset = 0;
  92         myOutDataLength = 0;
  93         return true;
  94 }
  95
  96 size_t CHMInputStream::read(char *buffer, size_t maxSize) {
  97         if (myDoSkip) {
  98                 do_read(0, myBytesToSkip);
  99                 myDoSkip = false;
 100         }
 101         size_t realSize = do_read(buffer, std::min(maxSize, mySize - myOffset));
 102         myOffset += realSize;
 103         return realSize;
 104 }
 105
 106 size_t CHMInputStream::do_read(char *buffer, size_t maxSize) {
 107         size_t realSize = 0;
 108         do {
 109                 if (myOutDataLength == 0) {
 110                         if (myBaseIndex >= mySectionInfo.ResetTable.size()) {
 111                                 break;
 112                         }
 113                         const bool isTail = myBaseIndex + 1 == mySectionInfo.ResetTable.size();
 114                         const size_t start = mySectionInfo.ResetTable[myBaseIndex];
 115                         const size_t end = isTail ? mySectionInfo.CompressedSize : mySectionInfo.ResetTable[myBaseIndex + 1];
 116                         myOutDataLength = isTail ? mySectionInfo.UncompressedSize % 0x8000 : 0x8000;
 117                         myOutDataOffset = 0;
 118
 119                         myInData.erase();
 120                         myInData.append(end - start, '\0');
 121                         myBase->seek(mySectionInfo.Offset + start, true);
 122                         myBase->read((char*)myInData.data(), myInData.length());
 123                         if (myBaseIndex % mySectionInfo.ResetInterval == 0) {
 124                                 myDecompressor->reset();
 125                         }
 126                         ++myBaseIndex;
 127
 128                         if (!myDecompressor->decompress(myInData, myOutData, myOutDataLength)) {
 129                                 break;
 130                         }
 131                 }
 132                 const size_t partSize = std::min(myOutDataLength, maxSize);
 133                 if (buffer != 0) {
 134                         memcpy(buffer + realSize, myOutData + myOutDataOffset, partSize);
 135                 }
 136                 maxSize -= partSize;
 137                 realSize += partSize;
 138                 myOutDataLength -= partSize;
 139                 myOutDataOffset += partSize;
 140         } while (maxSize != 0);
 141         return realSize;
 142 }
 143
 144 void CHMInputStream::close() {
 145         myDecompressor = 0;
 146 }
 147
 148 void CHMInputStream::seek(int offset, bool absoluteOffset) {
 149         if (absoluteOffset) {
 150                 offset -= myOffset;
 151         }
 152         if (offset > 0) {
 153                 read(0, offset);
 154         } else if (offset < 0) {
 155                 open();
 156                 read(0, std::max(offset + (int)myOffset, 0));
 157         }
 158 }
 159
 160 size_t CHMInputStream::offset() const {
 161         return myOffset;
 162 }
 163
 164 size_t CHMInputStream::sizeOfOpened() {
 165         return mySize;
 166 }
 167
 168 shared_ptr<ZLInputStream> CHMFileInfo::entryStream(shared_ptr<ZLInputStream> base, const std::string &name) const {
 169         RecordMap::const_iterator it = myRecords.find(ZLUnicodeUtil::toLower(name));
 170         if (it == myRecords.end()) {
 171                 return 0;
 172         }
 173         const RecordInfo &recordInfo = it->second;
 174         if (recordInfo.Length == 0) {
 175                 return 0;
 176         }
 177         if (recordInfo.Section == 0) {
 178                 // TODO: implement
 179                 return 0;
 180         }
 181         if (recordInfo.Section > mySectionInfos.size()) {
 182                 return 0;
 183         }
 184         const SectionInfo &sectionInfo = mySectionInfos[recordInfo.Section - 1];
 185         if (recordInfo.Offset + recordInfo.Length > sectionInfo.UncompressedSize) {
 186                 return 0;
 187         }
 188
 189         return new CHMInputStream(base, sectionInfo, recordInfo.Offset, recordInfo.Length);
 190 }
 191
 192 CHMFileInfo::CHMFileInfo(const std::string &fileName) : myFileName(fileName) {
 193 }
 194
 195 bool CHMFileInfo::moveToEntry(ZLInputStream &stream, const std::string &entryName) {
 196         RecordMap::const_iterator it = myRecords.find(entryName);
 197         if (it == myRecords.end()) {
 198                 return false;
 199         }
 200         RecordInfo recordInfo = it->second;
 201         if (recordInfo.Section > mySectionInfos.size()) {
 202                 return false;
 203         }
 204         if (recordInfo.Section != 0) {
 205                 // TODO: ???
 206                 return false;
 207         }
 208
 209         stream.seek(mySection0Offset + recordInfo.Offset, true);
 210         return true;
 211 }
 212
 213 bool CHMFileInfo::init(ZLInputStream &stream) {
 214         {
 215                 // header start
 216                 if (readString(stream, 4) != "ITSF") {
 217                         return false;
 218                 }
 219
 220                 unsigned long version = readUnsignedDWord(stream);
 221
 222                 // DWORD total length
 223                 // DWORD unknown
 224                 // DWORD timestamp
 225                 // DWORD language id
 226                 // 0x10 bytes 1st GUID
 227                 // 0x10 bytes 2nd GUID
 228                 // QWORD section 0 offset
 229                 // QWORD section 0 length
 230                 stream.seek(4 * 4 + 2 * 0x10 + 2 * 8, false);
 231
 232                 unsigned long long sectionOffset1 = readUnsignedQWord(stream);
 233                 unsigned long long sectionLength1 = readUnsignedQWord(stream);
 234                 mySection0Offset = sectionOffset1 + sectionLength1;
 235                 // header end
 236
 237                 // additional header data start
 238                 if (version > 2) {
 239                         mySection0Offset = readUnsignedQWord(stream);
 240                 }
 241                 // additional header data end
 242
 243                 stream.seek(sectionOffset1, true);
 244                 // header section 1 start
 245                 // directory header start
 246                 if (readString(stream, 4) != "ITSP") {
 247                         return false;
 248                 }
 249
 250                 // DWORD version
 251                 // DWORD length
 252                 // DWORD 0x000A
 253                 // DWORD chunk size
 254                 // DWORD density
 255                 // DWORD depth
 256                 // DWORD root chunk number
 257                 // DWORD first chunk number
 258                 // DWORD last chunk number
 259                 // DWORD -1
 260                 stream.seek(10 * 4, false);
 261                 unsigned long dirChunkNumber = readUnsignedDWord(stream);
 262                 // ...
 263                 stream.seek(36, false);
 264                 // header section 1 end
 265
 266                 size_t nextOffset = stream.offset();
 267                 for (unsigned long i = 0; i < dirChunkNumber; ++i) {
 268                         nextOffset += 4096;
 269                         std::string header = readString(stream, 4);
 270                         if (header == "PMGL") {
 271                                 unsigned long quickRefAreaSize = readUnsignedDWord(stream) % 4096;
 272                                 stream.seek(12, false);
 273                                 size_t startOffset = stream.offset();
 274                                 size_t oldOffset = startOffset;
 275                                 while (startOffset < nextOffset - quickRefAreaSize) {
 276                                         int nameLength = readEncodedInteger(stream);
 277                                         std::string name = readString(stream, nameLength);
 278                                         int contentSection = readEncodedInteger(stream);
 279                                         int offset = readEncodedInteger(stream);
 280                                         int length = readEncodedInteger(stream);
 281                                         if (name.substr(0, 2) != "::") {
 282                                                 name = ZLUnicodeUtil::toLower(name);
 283                                         }
 284                                         myRecords.insert(
 285                                                 std::pair<std::string,CHMFileInfo::RecordInfo>(
 286                                                         name,
 287                                                         CHMFileInfo::RecordInfo(contentSection, offset, length)
 288                                                 )
 289                                         );
 290                                         startOffset = stream.offset();
 291                                         if (oldOffset == startOffset) {
 292                                                 break;
 293                                         }
 294                                         oldOffset = startOffset;
 295                                 }
 296                         } else if (header == "PMGI") {
 297                                 unsigned long quickRefAreaSize = readUnsignedDWord(stream);
 298                                 size_t startOffset = stream.offset();
 299                                 size_t oldOffset = startOffset;
 300                                 while (startOffset < nextOffset - quickRefAreaSize) {
 301                                         int nameLength = readEncodedInteger(stream);
 302                                         std::string name = readString(stream, nameLength);
 303                                         // chunk number
 304                                         readEncodedInteger(stream);
 305                                         startOffset = stream.offset();
 306                                         if (oldOffset == startOffset) {
 307                                                 break;
 308                                         }
 309                                         oldOffset = startOffset;
 310                                 }
 311                         }
 312                         stream.seek(nextOffset, true);
 313                         if (stream.offset() != nextOffset) {
 314                                 break;
 315                         }
 316                 }
 317         }
 318
 319         {
 320                 if (!moveToEntry(stream, "::DataSpace/NameList")) {
 321                         return false;
 322                 }
 323                 stream.seek(2, false);
 324                 const int sectionNumber = readUnsignedWord(stream);
 325                 for (int i = 0; i < sectionNumber; ++i) {
 326                         const int length = readUnsignedWord(stream);
 327                         std::string sectionName;
 328                         sectionName.reserve(length);
 329                         for (int j = 0; j < length; ++j) {
 330                                 sectionName += (char)readUnsignedWord(stream);
 331                         }
 332                         stream.seek(2, false);
 333                         mySectionNames.push_back(sectionName);
 334                 }
 335         }
 336
 337         {
 338                 for (unsigned int i = 1; i < mySectionNames.size(); ++i) {
 339                         RecordMap::const_iterator it =
 340                                 myRecords.find("::DataSpace/Storage/" + mySectionNames[i] + "/Content");
 341                         if (it == myRecords.end()) {
 342                                 return false;
 343                         }
 344                         RecordInfo recordInfo = it->second;
 345                         if (recordInfo.Section != 0) {
 346                                 return false;
 347                         }
 348                         mySectionInfos.push_back(SectionInfo());
 349                         SectionInfo &info = mySectionInfos.back();
 350                         info.Offset = mySection0Offset + recordInfo.Offset;
 351                         info.Length = recordInfo.Length;
 352
 353                         if (!moveToEntry(stream, "::DataSpace/Storage/" + mySectionNames[i] + "/ControlData")) {
 354                                 return false;
 355                         }
 356                         stream.seek(4, false);
 357                         std::string lzxc = readString(stream, 4);
 358                         if (lzxc != "LZXC") {
 359                                 return false;
 360                         }
 361                         const int version = readUnsignedDWord(stream);
 362                         if ((version <= 0) || (version > 2)) {
 363                                 return false;
 364                         }
 365                         info.ResetInterval = readUnsignedDWord(stream);
 366                         if (version == 1) {
 367                                 info.ResetInterval /= 0x8000;
 368                         }
 369                         info.WindowSizeIndex = (version == 1) ? 0 : 15;
 370                         {
 371                                 int ws = readUnsignedDWord(stream);
 372                                 if (ws > 0) {
 373                                         while ((ws & 1) == 0) {
 374                                                 ws >>= 1;
 375                                                 info.WindowSizeIndex++;
 376                                         }
 377                                 }
 378                         }
 379
 380                         if (!moveToEntry(stream, "::DataSpace/Storage/" + mySectionNames[i] + "/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable")) {
 381                                 return false;
 382                         }
 383                         stream.seek(4, false);
 384                         const size_t entriesNumber = readUnsignedDWord(stream);
 385                         if (entriesNumber == 0) {
 386                                 return false;
 387                         }
 388                         if (entriesNumber > 2048) {
 389                                 // file size is greater than 60 Mb
 390                                 return false;
 391                         }
 392                         info.ResetTable.reserve(entriesNumber);
 393                         stream.seek(8, false);
 394                         info.UncompressedSize = readUnsignedQWord(stream);
 395                         if ((info.UncompressedSize - 1) / 0x8000 != entriesNumber - 1) {
 396                                 return false;
 397                         }
 398                         info.CompressedSize = readUnsignedQWord(stream);
 399                         stream.seek(8, false);
 400                         size_t previous = 0;
 401                         for (size_t j = 0; j < entriesNumber; ++j) {
 402                                 size_t value = readUnsignedQWord(stream);
 403                                 if ((j > 0) == (value <= previous)) {
 404                                         return false;
 405                                 }
 406                                 info.ResetTable.push_back(value);
 407                                 previous = value;
 408                         }
 409                 }
 410         }
 411
 412         return true;
 413 }
 414
 415 static std::string readNTString(ZLInputStream &stream) {
 416         std::string s;
 417         char c;
 418         while (stream.read(&c, 1) == 1) {
 419                 if (c == '\0') {
 420                         break;
 421                 } else {
 422                         s += c;
 423                 }
 424         }
 425         return CHMReferenceCollection::fullReference("/", s);
 426 }
 427
 428 bool CHMFileInfo::FileNames::empty() const {
 429         return Start.empty() && TOC.empty() && Home.empty() && Index.empty();
 430 }
 431
 432 CHMFileInfo::FileNames CHMFileInfo::sectionNames(shared_ptr<ZLInputStream> base) const {
 433         FileNames names;
 434         shared_ptr<ZLInputStream> stringsStream = entryStream(base, "/#STRINGS");
 435         if (!stringsStream.isNull() && stringsStream->open()) {
 436                 std::vector<std::string> fileNames;
 437                 int tocIndex = -1;
 438                 int indexIndex = -1;
 439                 for (int i = 0; i < 12; ++i) {
 440                         std::string argument = readNTString(*stringsStream);
 441                         if (argument.empty() || (argument[argument.length() - 1] == '/')) {
 442                                 continue;
 443                         }
 444                         if (myRecords.find(argument) == myRecords.end()) {
 445                                 continue;
 446                         }
 447                         if ((tocIndex == -1) && ZLStringUtil::stringEndsWith(argument, ".hhc")) {
 448                                 tocIndex = fileNames.size();
 449                                 names.TOC = argument;
 450                         } else if ((indexIndex == -1) && ZLStringUtil::stringEndsWith(argument, ".hhk")) {
 451                                 indexIndex = fileNames.size();
 452                                 names.Index = argument;
 453                         }
 454                         fileNames.push_back(argument);
 455                 }
 456                 size_t startIndex = std::max(3, std::max(tocIndex, indexIndex) + 1);
 457                 if (startIndex < 11) {
 458                         if (startIndex < fileNames.size()) {
 459                                 names.Start = fileNames[startIndex];
 460                         }
 461                         if (startIndex + 1 < fileNames.size()) {
 462                                 names.Home = fileNames[startIndex + 1];
 463                         }
 464                 }
 465                 stringsStream->close();
 466         }
 467         if (names.TOC.empty()) {
 468                 for (RecordMap::const_iterator it = myRecords.begin(); it != myRecords.end(); ++it) {
 469                         if (ZLStringUtil::stringEndsWith(it->first, ".hhc")) {
 470                                 names.TOC = it->first;
 471                                 break;
 472                         }
 473                 }
 474         }
 475         if (names.empty()) {
 476                 for (RecordMap::const_iterator it = myRecords.begin(); it != myRecords.end(); ++it) {
 477                         if ((ZLStringUtil::stringEndsWith(it->first, ".htm")) ||
 478                             (ZLStringUtil::stringEndsWith(it->first, ".html"))) {
 479                                 names.Start = it->first;
 480                                 break;
 481                         }
 482                 }
 483         }
 484
 485         return names;
 486 }
 487
 488 const std::string CHMFileInfo::fileName() const {
 489         return myFileName;
 490 }