gui
[lbook_fbreader.git] / fbreader / src / formats / pdb / PluckerBookReader.cpp
blob0abeb0fd23a5aa0fea4a23fab1b13a6d23498ed8
1 /*
2 * Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
20 #include <algorithm>
21 #include <vector>
22 #include <cctype>
24 #include <ZLZDecompressor.h>
25 #include <ZLStringUtil.h>
26 #include <ZLUnicodeUtil.h>
27 #include <ZLImage.h>
28 #include <ZLFileImage.h>
29 #include <ZLFile.h>
31 #include "PdbReader.h"
32 #include "PluckerBookReader.h"
33 #include "DocDecompressor.h"
34 #include "PluckerImages.h"
35 #include "../../bookmodel/BookModel.h"
37 PluckerBookReader::PluckerBookReader(const std::string &filePath, BookModel &model, const std::string &encoding) : BookReader(model), EncodedTextReader(encoding), myFilePath(filePath), myFont(FT_REGULAR) {
38 myCharBuffer = new char[65535];
39 myForcedEntry = 0;
42 PluckerBookReader::~PluckerBookReader() {
43 delete[] myCharBuffer;
46 void PluckerBookReader::safeAddControl(FBTextKind kind, bool start) {
47 if (myParagraphStarted) {
48 addControl(kind, start);
49 } else {
50 myDelayedControls.push_back(std::pair<FBTextKind,bool>(kind, start));
54 void PluckerBookReader::safeAddHyperlinkControl(const std::string &id) {
55 if (myParagraphStarted) {
56 addHyperlinkControl(INTERNAL_HYPERLINK, id);
57 } else {
58 myDelayedHyperlinks.push_back(id);
62 void PluckerBookReader::safeBeginParagraph() {
63 if (!myParagraphStarted) {
64 myParagraphStarted = true;
65 myBufferIsEmpty = true;
66 beginParagraph();
67 if (!myParagraphStored) {
68 myParagraphVector->push_back(model().bookTextModel()->paragraphsNumber() - 1);
69 myParagraphStored = true;
71 for (std::vector<std::pair<FBTextKind,bool> >::const_iterator it = myDelayedControls.begin(); it != myDelayedControls.end(); ++it) {
72 addControl(it->first, it->second);
74 if (myForcedEntry != 0) {
75 addControl(*myForcedEntry);
76 } else {
77 addControl(REGULAR, true);
79 for (std::vector<std::string>::const_iterator it = myDelayedHyperlinks.begin(); it != myDelayedHyperlinks.end(); ++it) {
80 addHyperlinkControl(INTERNAL_HYPERLINK, *it);
82 myDelayedHyperlinks.clear();
87 void PluckerBookReader::safeEndParagraph() {
88 if (myParagraphStarted) {
89 if (myBufferIsEmpty) {
90 static const std::string SPACE = " ";
91 addData(SPACE);
93 endParagraph();
94 myParagraphStarted = false;
98 void PluckerBookReader::processHeader(FontType font, bool start) {
99 if (start) {
100 enterTitle();
101 FBTextKind kind;
102 switch (font) {
103 case FT_H1:
104 kind = H1;
105 break;
106 case FT_H2:
107 kind = H2;
108 break;
109 case FT_H3:
110 kind = H3;
111 break;
112 case FT_H4:
113 kind = H4;
114 break;
115 case FT_H5:
116 kind = H5;
117 break;
118 case FT_H6:
119 default:
120 kind = H6;
121 break;
123 pushKind(kind);
124 } else {
125 popKind();
126 exitTitle();
130 void PluckerBookReader::setFont(FontType font, bool start) {
131 switch (font) {
132 case FT_REGULAR:
133 break;
134 case FT_H1:
135 case FT_H2:
136 case FT_H3:
137 case FT_H4:
138 case FT_H5:
139 case FT_H6:
140 processHeader(font, start);
141 break;
142 case FT_BOLD:
143 safeAddControl(BOLD, start);
144 break;
145 case FT_TT:
146 safeAddControl(CODE, start);
147 break;
148 case FT_SMALL:
149 break;
150 case FT_SUB:
151 safeAddControl(SUB, start);
152 break;
153 case FT_SUP:
154 safeAddControl(SUP, start);
155 break;
159 void PluckerBookReader::changeFont(FontType font) {
160 if (myFont == font) {
161 return;
163 setFont(myFont, false);
164 myFont = font;
165 setFont(myFont, true);
169 static void listParameters(char *ptr) {
170 int argc = ((unsigned char)*ptr) % 8;
171 std::cerr << (int)(unsigned char)*ptr << "(";
172 for (int i = 0; i < argc - 1; ++i) {
173 ++ptr;
174 std::cerr << (int)*ptr << ", ";
176 if (argc > 0) {
177 ++ptr;
178 std::cerr << (int)*ptr;
180 std::cerr << ")\n";
184 static unsigned int twoBytes(char *ptr) {
185 return 256 * (unsigned char)*ptr + (unsigned char)*(ptr + 1);
188 static std::string fromNumber(unsigned int num) {
189 std::string str;
190 ZLStringUtil::appendNumber(str, num);
191 return str;
194 void PluckerBookReader::processTextFunction(char *ptr) {
195 switch ((unsigned char)*ptr) {
196 case 0x08:
197 safeAddControl(INTERNAL_HYPERLINK, false);
198 break;
199 case 0x0A:
200 safeAddHyperlinkControl(fromNumber(twoBytes(ptr + 1)));
201 break;
202 case 0x0C:
204 int sectionNum = twoBytes(ptr + 1);
205 int paragraphNum = twoBytes(ptr + 3);
206 safeAddHyperlinkControl(fromNumber(sectionNum) + '#' + fromNumber(paragraphNum));
207 myReferencedParagraphs.insert(std::pair<int,int>(sectionNum, paragraphNum));
208 break;
210 case 0x11:
211 changeFont((FontType)*(ptr + 1));
212 break;
213 case 0x1A:
214 safeBeginParagraph();
215 addImageReference(fromNumber(twoBytes(ptr + 1)));
216 break;
217 case 0x22:
218 if (!myParagraphStarted) {
219 if (myForcedEntry == 0) {
220 myForcedEntry = new ZLTextForcedControlEntry();
222 myForcedEntry->setLeftIndent(*(ptr + 1));
223 myForcedEntry->setRightIndent(*(ptr + 2));
225 break;
226 case 0x29:
227 if (!myParagraphStarted) {
228 if (myForcedEntry == 0) {
229 myForcedEntry = new ZLTextForcedControlEntry();
231 switch (*(ptr + 1)) {
232 case 0: myForcedEntry->setAlignmentType(ALIGN_LEFT); break;
233 case 1: myForcedEntry->setAlignmentType(ALIGN_RIGHT); break;
234 case 2: myForcedEntry->setAlignmentType(ALIGN_CENTER); break;
235 case 3: myForcedEntry->setAlignmentType(ALIGN_JUSTIFY); break;
238 break;
239 case 0x33: // just break line instead of horizontal rule (TODO: draw horizontal rule?)
240 safeEndParagraph();
241 break;
242 case 0x38:
243 safeEndParagraph();
244 break;
245 case 0x40:
246 safeAddControl(EMPHASIS, true);
247 break;
248 case 0x48:
249 safeAddControl(EMPHASIS, false);
250 break;
251 case 0x53: // color setting is ignored
252 break;
253 case 0x5C:
254 addImageReference(fromNumber(twoBytes(ptr + 3)));
255 break;
256 case 0x60: // underlined text is ignored
257 break;
258 case 0x68: // underlined text is ignored
259 break;
260 case 0x70: // strike-through text is ignored
261 break;
262 case 0x78: // strike-through text is ignored
263 break;
264 case 0x83:
266 char utf8[4];
267 int len = ZLUnicodeUtil::ucs2ToUtf8(utf8, twoBytes(ptr + 2));
268 safeBeginParagraph();
269 addData(std::string(utf8, len));
270 myBufferIsEmpty = false;
271 myBytesToSkip = *(ptr + 1);
272 break;
274 case 0x85: // TODO: process 4-byte unicode character
275 break;
276 case 0x8E: // custom font operations are ignored
277 case 0x8C:
278 case 0x8A:
279 case 0x88:
280 break;
281 case 0x90: // TODO: add table processing
282 case 0x92: // TODO: process table
283 case 0x97: // TODO: process table
284 break;
285 default: // this should be impossible
286 //std::cerr << "Oops... function #" << (int)(unsigned char)*ptr << "\n";
287 break;
291 void PluckerBookReader::processTextParagraph(char *start, char *end) {
292 changeFont(FT_REGULAR);
293 while (popKind()) {}
295 myParagraphStarted = false;
296 myBytesToSkip = 0;
298 char *textStart = start;
299 bool functionFlag = false;
300 for (char *ptr = start; ptr < end; ++ptr) {
301 if (*ptr == 0) {
302 functionFlag = true;
303 if (ptr > textStart) {
304 safeBeginParagraph();
305 myConvertedTextBuffer.erase();
306 myConverter->convert(myConvertedTextBuffer, textStart, ptr);
307 addData(myConvertedTextBuffer);
308 myBufferIsEmpty = false;
310 } else if (functionFlag) {
311 int paramCounter = ((unsigned char)*ptr) % 8;
312 if (end - ptr > paramCounter) {
313 processTextFunction(ptr);
314 ptr += paramCounter;
315 } else {
316 ptr = end - 1;
318 functionFlag = false;
319 if (myBytesToSkip > 0) {
320 ptr += myBytesToSkip;
321 myBytesToSkip = 0;
323 textStart = ptr + 1;
324 } else {
325 if ((unsigned char)*ptr == 0xA0) {
326 *ptr = 0x20;
328 if (!myParagraphStarted && (textStart == ptr) && isspace((unsigned char)*ptr)) {
329 ++textStart;
333 if (end > textStart) {
334 safeBeginParagraph();
335 myConvertedTextBuffer.erase();
336 myConverter->convert(myConvertedTextBuffer, textStart, end);
337 addData(myConvertedTextBuffer);
338 myBufferIsEmpty = false;
340 safeEndParagraph();
341 if (myForcedEntry != 0) {
342 delete myForcedEntry;
343 myForcedEntry = 0;
345 myDelayedControls.clear();
348 void PluckerBookReader::processTextRecord(size_t size, const std::vector<int> &pars) {
349 char *start = myCharBuffer;
350 char *end = myCharBuffer;
352 for (std::vector<int>::const_iterator it = pars.begin(); it != pars.end(); ++it) {
353 start = end;
354 end = start + *it;
355 if (end > myCharBuffer + size) {
356 return;
358 myParagraphStored = false;
359 processTextParagraph(start, end);
360 if (!myParagraphStored) {
361 myParagraphVector->push_back(-1);
366 void PluckerBookReader::readRecord(size_t recordSize) {
367 unsigned short uid;
368 PdbUtil::readUnsignedShort(*myStream, uid);
369 if (uid == 1) {
370 PdbUtil::readUnsignedShort(*myStream, myCompressionVersion);
371 } else {
372 unsigned short paragraphs;
373 PdbUtil::readUnsignedShort(*myStream, paragraphs);
375 unsigned short size;
376 PdbUtil::readUnsignedShort(*myStream, size);
378 unsigned char type;
379 myStream->read((char*)&type, 1);
381 unsigned char flags;
382 myStream->read((char*)&flags, 1);
384 switch (type) {
385 case 0: // text (TODO: found sample file and test this code)
386 case 1: // compressed text
388 std::vector<int> pars;
389 for (int i = 0; i < paragraphs; ++i) {
390 unsigned short pSize;
391 PdbUtil::readUnsignedShort(*myStream, pSize);
392 pars.push_back(pSize);
393 myStream->seek(2, false);
396 bool doProcess = false;
397 if (type == 0) {
398 doProcess = myStream->read(myCharBuffer, size) == size;
399 } else if (myCompressionVersion == 1) {
400 doProcess =
401 DocDecompressor().decompress(*myStream, myCharBuffer, recordSize - 8 - 4 * paragraphs, size) == size;
402 } else if (myCompressionVersion == 2) {
403 myStream->seek(2, false);
404 doProcess =
405 ZLZDecompressor(recordSize - 10 - 4 * paragraphs).
406 decompress(*myStream, myCharBuffer, size) == size;
408 if (doProcess) {
409 addHyperlinkLabel(fromNumber(uid));
410 myParagraphVector = &myParagraphMap[uid];
411 processTextRecord(size, pars);
412 if ((flags & 0x1) == 0) {
413 insertEndOfTextParagraph();
416 break;
418 case 2: // image
419 case 3: // compressed image
421 static const std::string mime = "image/palm";
422 ZLImage *image = 0;
423 if (type == 2) {
424 image = new ZLFileImage(mime, myFilePath, myStream->offset(), recordSize - 8);
425 } else if (myCompressionVersion == 1) {
426 image = new DocCompressedFileImage(mime, myFilePath, myStream->offset(), recordSize - 8);
427 } else if (myCompressionVersion == 2) {
428 image = new ZCompressedFileImage(mime, myFilePath, myStream->offset() + 2, recordSize - 10);
430 if (image != 0) {
431 addImage(fromNumber(uid), image);
433 break;
435 case 9: // category record is ignored
436 break;
437 case 10:
438 unsigned short typeCode;
439 PdbUtil::readUnsignedShort(*myStream, typeCode);
440 //std::cerr << "type = " << (int)type << "; ";
441 //std::cerr << "typeCode = " << typeCode << "\n";
442 break;
443 case 11: // style sheet record is ignored
444 break;
445 case 12: // font page record is ignored
446 break;
447 case 13: // TODO: process tables
448 case 14: // TODO: process tables
449 break;
450 case 15: // multiimage
452 unsigned short columns;
453 unsigned short rows;
454 PdbUtil::readUnsignedShort(*myStream, columns);
455 PdbUtil::readUnsignedShort(*myStream, rows);
456 PluckerMultiImage *image = new PluckerMultiImage(rows, columns, model().imageMap());
457 for (int i = 0; i < size / 2 - 2; ++i) {
458 unsigned short us;
459 PdbUtil::readUnsignedShort(*myStream, us);
460 image->addId(fromNumber(us));
462 addImage(fromNumber(uid), image);
463 break;
465 default:
466 //std::cerr << "type = " << (int)type << "\n";
467 break;
472 bool PluckerBookReader::readDocument() {
473 myStream = ZLFile(myFilePath).inputStream();
474 if (myStream.isNull() || !myStream->open()) {
475 return false;
478 PdbHeader header;
479 if (!header.read(myStream)) {
480 myStream->close();
481 return false;
484 setMainTextModel();
485 myFont = FT_REGULAR;
487 for (std::vector<unsigned long>::const_iterator it = header.Offsets.begin(); it != header.Offsets.end(); ++it) {
488 size_t currentOffset = myStream->offset();
489 if (currentOffset > *it) {
490 break;
492 myStream->seek(*it - currentOffset, false);
493 if (myStream->offset() != *it) {
494 break;
496 size_t recordSize = ((it != header.Offsets.end() - 1) ? *(it + 1) : myStream->sizeOfOpened()) - *it;
497 readRecord(recordSize);
499 myStream->close();
501 for (std::set<std::pair<int,int> >::const_iterator it = myReferencedParagraphs.begin(); it != myReferencedParagraphs.end(); ++it) {
502 std::map<int,std::vector<int> >::const_iterator jt = myParagraphMap.find(it->first);
503 if (jt != myParagraphMap.end()) {
504 for (unsigned int k = it->second; k < jt->second.size(); ++k) {
505 if (jt->second[k] != -1) {
506 addHyperlinkLabel(fromNumber(it->first) + '#' + fromNumber(it->second), jt->second[k]);
507 break;
512 myReferencedParagraphs.clear();
513 myParagraphMap.clear();
514 return true;