2 * Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include <ZLZDecompressor.h>
25 #include <ZLStringUtil.h>
26 #include <ZLUnicodeUtil.h>
28 #include <ZLFileImage.h>
31 #include "PdbReader.h"
32 #include "PluckerBookReader.h"
33 #include "DocDecompressor.h"
34 #include "PluckerImages.h"
35 #include "../../bookmodel/BookModel.h"
37 PluckerBookReader::PluckerBookReader(const std::string
&filePath
, BookModel
&model
, const std::string
&encoding
) : BookReader(model
), EncodedTextReader(encoding
), myFilePath(filePath
), myFont(FT_REGULAR
) {
38 myCharBuffer
= new char[65535];
42 PluckerBookReader::~PluckerBookReader() {
43 delete[] myCharBuffer
;
46 void PluckerBookReader::safeAddControl(FBTextKind kind
, bool start
) {
47 if (myParagraphStarted
) {
48 addControl(kind
, start
);
50 myDelayedControls
.push_back(std::pair
<FBTextKind
,bool>(kind
, start
));
54 void PluckerBookReader::safeAddHyperlinkControl(const std::string
&id
) {
55 if (myParagraphStarted
) {
56 addHyperlinkControl(INTERNAL_HYPERLINK
, id
);
58 myDelayedHyperlinks
.push_back(id
);
62 void PluckerBookReader::safeBeginParagraph() {
63 if (!myParagraphStarted
) {
64 myParagraphStarted
= true;
65 myBufferIsEmpty
= true;
67 if (!myParagraphStored
) {
68 myParagraphVector
->push_back(model().bookTextModel()->paragraphsNumber() - 1);
69 myParagraphStored
= true;
71 for (std::vector
<std::pair
<FBTextKind
,bool> >::const_iterator it
= myDelayedControls
.begin(); it
!= myDelayedControls
.end(); ++it
) {
72 addControl(it
->first
, it
->second
);
74 if (myForcedEntry
!= 0) {
75 addControl(*myForcedEntry
);
77 addControl(REGULAR
, true);
79 for (std::vector
<std::string
>::const_iterator it
= myDelayedHyperlinks
.begin(); it
!= myDelayedHyperlinks
.end(); ++it
) {
80 addHyperlinkControl(INTERNAL_HYPERLINK
, *it
);
82 myDelayedHyperlinks
.clear();
87 void PluckerBookReader::safeEndParagraph() {
88 if (myParagraphStarted
) {
89 if (myBufferIsEmpty
) {
90 static const std::string SPACE
= " ";
94 myParagraphStarted
= false;
98 void PluckerBookReader::processHeader(FontType font
, bool start
) {
130 void PluckerBookReader::setFont(FontType font
, bool start
) {
140 processHeader(font
, start
);
143 safeAddControl(BOLD
, start
);
146 safeAddControl(CODE
, start
);
151 safeAddControl(SUB
, start
);
154 safeAddControl(SUP
, start
);
159 void PluckerBookReader::changeFont(FontType font
) {
160 if (myFont
== font
) {
163 setFont(myFont
, false);
165 setFont(myFont
, true);
169 static void listParameters(char *ptr) {
170 int argc = ((unsigned char)*ptr) % 8;
171 std::cerr << (int)(unsigned char)*ptr << "(";
172 for (int i = 0; i < argc - 1; ++i) {
174 std::cerr << (int)*ptr << ", ";
178 std::cerr << (int)*ptr;
184 static unsigned int twoBytes(char *ptr
) {
185 return 256 * (unsigned char)*ptr
+ (unsigned char)*(ptr
+ 1);
188 static std::string
fromNumber(unsigned int num
) {
190 ZLStringUtil::appendNumber(str
, num
);
194 void PluckerBookReader::processTextFunction(char *ptr
) {
195 switch ((unsigned char)*ptr
) {
197 safeAddControl(INTERNAL_HYPERLINK
, false);
200 safeAddHyperlinkControl(fromNumber(twoBytes(ptr
+ 1)));
204 int sectionNum
= twoBytes(ptr
+ 1);
205 int paragraphNum
= twoBytes(ptr
+ 3);
206 safeAddHyperlinkControl(fromNumber(sectionNum
) + '#' + fromNumber(paragraphNum
));
207 myReferencedParagraphs
.insert(std::pair
<int,int>(sectionNum
, paragraphNum
));
211 changeFont((FontType
)*(ptr
+ 1));
214 safeBeginParagraph();
215 addImageReference(fromNumber(twoBytes(ptr
+ 1)));
218 if (!myParagraphStarted
) {
219 if (myForcedEntry
== 0) {
220 myForcedEntry
= new ZLTextForcedControlEntry();
222 myForcedEntry
->setLeftIndent(*(ptr
+ 1));
223 myForcedEntry
->setRightIndent(*(ptr
+ 2));
227 if (!myParagraphStarted
) {
228 if (myForcedEntry
== 0) {
229 myForcedEntry
= new ZLTextForcedControlEntry();
231 switch (*(ptr
+ 1)) {
232 case 0: myForcedEntry
->setAlignmentType(ALIGN_LEFT
); break;
233 case 1: myForcedEntry
->setAlignmentType(ALIGN_RIGHT
); break;
234 case 2: myForcedEntry
->setAlignmentType(ALIGN_CENTER
); break;
235 case 3: myForcedEntry
->setAlignmentType(ALIGN_JUSTIFY
); break;
239 case 0x33: // just break line instead of horizontal rule (TODO: draw horizontal rule?)
246 safeAddControl(EMPHASIS
, true);
249 safeAddControl(EMPHASIS
, false);
251 case 0x53: // color setting is ignored
254 addImageReference(fromNumber(twoBytes(ptr
+ 3)));
256 case 0x60: // underlined text is ignored
258 case 0x68: // underlined text is ignored
260 case 0x70: // strike-through text is ignored
262 case 0x78: // strike-through text is ignored
267 int len
= ZLUnicodeUtil::ucs2ToUtf8(utf8
, twoBytes(ptr
+ 2));
268 safeBeginParagraph();
269 addData(std::string(utf8
, len
));
270 myBufferIsEmpty
= false;
271 myBytesToSkip
= *(ptr
+ 1);
274 case 0x85: // TODO: process 4-byte unicode character
276 case 0x8E: // custom font operations are ignored
281 case 0x90: // TODO: add table processing
282 case 0x92: // TODO: process table
283 case 0x97: // TODO: process table
285 default: // this should be impossible
286 //std::cerr << "Oops... function #" << (int)(unsigned char)*ptr << "\n";
291 void PluckerBookReader::processTextParagraph(char *start
, char *end
) {
292 changeFont(FT_REGULAR
);
295 myParagraphStarted
= false;
298 char *textStart
= start
;
299 bool functionFlag
= false;
300 for (char *ptr
= start
; ptr
< end
; ++ptr
) {
303 if (ptr
> textStart
) {
304 safeBeginParagraph();
305 myConvertedTextBuffer
.erase();
306 myConverter
->convert(myConvertedTextBuffer
, textStart
, ptr
);
307 addData(myConvertedTextBuffer
);
308 myBufferIsEmpty
= false;
310 } else if (functionFlag
) {
311 int paramCounter
= ((unsigned char)*ptr
) % 8;
312 if (end
- ptr
> paramCounter
) {
313 processTextFunction(ptr
);
318 functionFlag
= false;
319 if (myBytesToSkip
> 0) {
320 ptr
+= myBytesToSkip
;
325 if ((unsigned char)*ptr
== 0xA0) {
328 if (!myParagraphStarted
&& (textStart
== ptr
) && isspace((unsigned char)*ptr
)) {
333 if (end
> textStart
) {
334 safeBeginParagraph();
335 myConvertedTextBuffer
.erase();
336 myConverter
->convert(myConvertedTextBuffer
, textStart
, end
);
337 addData(myConvertedTextBuffer
);
338 myBufferIsEmpty
= false;
341 if (myForcedEntry
!= 0) {
342 delete myForcedEntry
;
345 myDelayedControls
.clear();
348 void PluckerBookReader::processTextRecord(size_t size
, const std::vector
<int> &pars
) {
349 char *start
= myCharBuffer
;
350 char *end
= myCharBuffer
;
352 for (std::vector
<int>::const_iterator it
= pars
.begin(); it
!= pars
.end(); ++it
) {
355 if (end
> myCharBuffer
+ size
) {
358 myParagraphStored
= false;
359 processTextParagraph(start
, end
);
360 if (!myParagraphStored
) {
361 myParagraphVector
->push_back(-1);
366 void PluckerBookReader::readRecord(size_t recordSize
) {
368 PdbUtil::readUnsignedShort(*myStream
, uid
);
370 PdbUtil::readUnsignedShort(*myStream
, myCompressionVersion
);
372 unsigned short paragraphs
;
373 PdbUtil::readUnsignedShort(*myStream
, paragraphs
);
376 PdbUtil::readUnsignedShort(*myStream
, size
);
379 myStream
->read((char*)&type
, 1);
382 myStream
->read((char*)&flags
, 1);
385 case 0: // text (TODO: found sample file and test this code)
386 case 1: // compressed text
388 std::vector
<int> pars
;
389 for (int i
= 0; i
< paragraphs
; ++i
) {
390 unsigned short pSize
;
391 PdbUtil::readUnsignedShort(*myStream
, pSize
);
392 pars
.push_back(pSize
);
393 myStream
->seek(2, false);
396 bool doProcess
= false;
398 doProcess
= myStream
->read(myCharBuffer
, size
) == size
;
399 } else if (myCompressionVersion
== 1) {
401 DocDecompressor().decompress(*myStream
, myCharBuffer
, recordSize
- 8 - 4 * paragraphs
, size
) == size
;
402 } else if (myCompressionVersion
== 2) {
403 myStream
->seek(2, false);
405 ZLZDecompressor(recordSize
- 10 - 4 * paragraphs
).
406 decompress(*myStream
, myCharBuffer
, size
) == size
;
409 addHyperlinkLabel(fromNumber(uid
));
410 myParagraphVector
= &myParagraphMap
[uid
];
411 processTextRecord(size
, pars
);
412 if ((flags
& 0x1) == 0) {
413 insertEndOfTextParagraph();
419 case 3: // compressed image
421 static const std::string mime
= "image/palm";
424 image
= new ZLFileImage(mime
, myFilePath
, myStream
->offset(), recordSize
- 8);
425 } else if (myCompressionVersion
== 1) {
426 image
= new DocCompressedFileImage(mime
, myFilePath
, myStream
->offset(), recordSize
- 8);
427 } else if (myCompressionVersion
== 2) {
428 image
= new ZCompressedFileImage(mime
, myFilePath
, myStream
->offset() + 2, recordSize
- 10);
431 addImage(fromNumber(uid
), image
);
435 case 9: // category record is ignored
438 unsigned short typeCode
;
439 PdbUtil::readUnsignedShort(*myStream
, typeCode
);
440 //std::cerr << "type = " << (int)type << "; ";
441 //std::cerr << "typeCode = " << typeCode << "\n";
443 case 11: // style sheet record is ignored
445 case 12: // font page record is ignored
447 case 13: // TODO: process tables
448 case 14: // TODO: process tables
450 case 15: // multiimage
452 unsigned short columns
;
454 PdbUtil::readUnsignedShort(*myStream
, columns
);
455 PdbUtil::readUnsignedShort(*myStream
, rows
);
456 PluckerMultiImage
*image
= new PluckerMultiImage(rows
, columns
, model().imageMap());
457 for (int i
= 0; i
< size
/ 2 - 2; ++i
) {
459 PdbUtil::readUnsignedShort(*myStream
, us
);
460 image
->addId(fromNumber(us
));
462 addImage(fromNumber(uid
), image
);
466 //std::cerr << "type = " << (int)type << "\n";
472 bool PluckerBookReader::readDocument() {
473 myStream
= ZLFile(myFilePath
).inputStream();
474 if (myStream
.isNull() || !myStream
->open()) {
479 if (!header
.read(myStream
)) {
487 for (std::vector
<unsigned long>::const_iterator it
= header
.Offsets
.begin(); it
!= header
.Offsets
.end(); ++it
) {
488 size_t currentOffset
= myStream
->offset();
489 if (currentOffset
> *it
) {
492 myStream
->seek(*it
- currentOffset
, false);
493 if (myStream
->offset() != *it
) {
496 size_t recordSize
= ((it
!= header
.Offsets
.end() - 1) ? *(it
+ 1) : myStream
->sizeOfOpened()) - *it
;
497 readRecord(recordSize
);
501 for (std::set
<std::pair
<int,int> >::const_iterator it
= myReferencedParagraphs
.begin(); it
!= myReferencedParagraphs
.end(); ++it
) {
502 std::map
<int,std::vector
<int> >::const_iterator jt
= myParagraphMap
.find(it
->first
);
503 if (jt
!= myParagraphMap
.end()) {
504 for (unsigned int k
= it
->second
; k
< jt
->second
.size(); ++k
) {
505 if (jt
->second
[k
] != -1) {
506 addHyperlinkLabel(fromNumber(it
->first
) + '#' + fromNumber(it
->second
), jt
->second
[k
]);
512 myReferencedParagraphs
.clear();
513 myParagraphMap
.clear();