2 * Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 #include <ZLInputStream.h>
27 #include "RtfReader.h"
29 std::map
<std::string
, RtfCommand
*> RtfReader::ourKeywordMap
;
31 static const int rtfStreamBufferSize
= 4096;
33 RtfReader::RtfReader(const std::string
&encoding
) : EncodedTextReader(encoding
) {
36 RtfReader::~RtfReader() {
39 RtfCommand::~RtfCommand() {
42 void RtfNewParagraphCommand::run(RtfReader
&reader
, int*) const {
43 reader
.newParagraph();
46 RtfFontPropertyCommand::RtfFontPropertyCommand(RtfReader::FontProperty property
) : myProperty(property
) {
49 void RtfFontPropertyCommand::run(RtfReader
&reader
, int *parameter
) const {
50 bool start
= (parameter
== 0) || (*parameter
!= 0);
52 case RtfReader::FONT_BOLD
:
53 if (reader
.myState
.Bold
!= start
) {
54 reader
.myState
.Bold
= start
;
55 reader
.setFontProperty(RtfReader::FONT_BOLD
);
58 case RtfReader::FONT_ITALIC
:
59 if (reader
.myState
.Italic
!= start
) {
60 reader
.myState
.Italic
= start
;
61 reader
.setFontProperty(RtfReader::FONT_ITALIC
);
64 case RtfReader::FONT_UNDERLINED
:
65 if (reader
.myState
.Underlined
!= start
) {
66 reader
.myState
.Underlined
= start
;
67 reader
.setFontProperty(RtfReader::FONT_UNDERLINED
);
73 RtfAlignmentCommand::RtfAlignmentCommand(ZLTextAlignmentType alignment
) : myAlignment(alignment
) {
76 void RtfAlignmentCommand::run(RtfReader
&reader
, int*) const {
77 if (reader
.myState
.Alignment
!= myAlignment
) {
78 reader
.myState
.Alignment
= myAlignment
;
79 reader
.setAlignment();
83 RtfCharCommand::RtfCharCommand(const std::string
&chr
) : myChar(chr
) {
86 void RtfCharCommand::run(RtfReader
&reader
, int*) const {
87 reader
.processCharData(myChar
.data(), myChar
.length(), false);
90 RtfDestinationCommand::RtfDestinationCommand(RtfReader::DestinationType destination
) : myDestination(destination
) {
93 void RtfDestinationCommand::run(RtfReader
&reader
, int*) const {
94 if (reader
.myState
.Destination
== myDestination
) {
97 reader
.myState
.Destination
= myDestination
;
98 if (myDestination
== RtfReader::DESTINATION_PICTURE
) {
99 reader
.myState
.ReadDataAsHex
= true;
101 reader
.switchDestination(myDestination
, true);
104 void RtfStyleCommand::run(RtfReader
&reader
, int*) const {
105 if (reader
.myState
.Destination
== RtfReader::DESTINATION_STYLESHEET
) {
106 //std::cerr << "Add style index: " << val << "\n";
108 //sprintf(style_attributes[0], "%i", val);
109 } else /*if (myState.Destination == rdsContent)*/ {
110 //std::cerr << "Set style index: " << val << "\n";
112 //sprintf(style_attributes[0], "%i", val);
116 void RtfCodepageCommand::run(RtfReader
&reader
, int *parameter
) const {
117 if (parameter
!= 0) {
118 reader
.setEncoding(*parameter
);
122 void RtfSpecialCommand::run(RtfReader
&reader
, int*) const {
123 reader
.mySpecialMode
= true;
126 RtfPictureCommand::RtfPictureCommand(const std::string
&mimeType
) : myMimeType(mimeType
) {
129 void RtfPictureCommand::run(RtfReader
&reader
, int*) const {
130 reader
.myNextImageMimeType
= myMimeType
;
133 void RtfFontResetCommand::run(RtfReader
&reader
, int*) const {
134 if (reader
.myState
.Bold
) {
135 reader
.myState
.Bold
= false;
136 reader
.setFontProperty(RtfReader::FONT_BOLD
);
138 if (reader
.myState
.Italic
) {
139 reader
.myState
.Italic
= false;
140 reader
.setFontProperty(RtfReader::FONT_ITALIC
);
142 if (reader
.myState
.Underlined
) {
143 reader
.myState
.Underlined
= false;
144 reader
.setFontProperty(RtfReader::FONT_UNDERLINED
);
148 void RtfReader::addAction(const std::string
&tag
, RtfCommand
*command
) {
149 ourKeywordMap
.insert(std::pair
<std::string
,RtfCommand
*>(tag
, command
));
152 void RtfReader::fillKeywordMap() {
153 if (ourKeywordMap
.empty()) {
154 addAction("*", new RtfSpecialCommand());
155 addAction("ansicpg", new RtfCodepageCommand());
157 static const char *keywordsToSkip
[] = {"buptim", "colortbl", "comment", "creatim", "doccomm", "fonttbl", "footer", "footerf", "footerl", "footerr", "ftncn", "ftnsep", "ftnsepc", "header", "headerf", "headerl", "headerr", "keywords", "operator", "printim", "private1", "revtim", "rxe", "subject", "tc", "txe", "xe", 0};
158 RtfCommand
*skipCommand
= new RtfDestinationCommand(RtfReader::DESTINATION_SKIP
);
159 for (const char **i
= keywordsToSkip
; *i
!= 0; ++i
) {
160 addAction(*i
, skipCommand
);
162 addAction("info", new RtfDestinationCommand(RtfReader::DESTINATION_INFO
));
163 addAction("title", new RtfDestinationCommand(RtfReader::DESTINATION_TITLE
));
164 addAction("author", new RtfDestinationCommand(RtfReader::DESTINATION_AUTHOR
));
165 addAction("pict", new RtfDestinationCommand(RtfReader::DESTINATION_PICTURE
));
166 addAction("stylesheet", new RtfDestinationCommand(RtfReader::DESTINATION_STYLESHEET
));
167 addAction("footnote", new RtfDestinationCommand(RtfReader::DESTINATION_FOOTNOTE
));
169 RtfCommand
*newParagraphCommand
= new RtfNewParagraphCommand();
170 addAction("\n", newParagraphCommand
);
171 addAction("\r", newParagraphCommand
);
172 addAction("par", newParagraphCommand
);
174 addAction("\x09", new RtfCharCommand("\x09"));
175 addAction("_", new RtfCharCommand("-"));
176 addAction("\\", new RtfCharCommand("\\"));
177 addAction("{", new RtfCharCommand("{"));
178 addAction("}", new RtfCharCommand("}"));
179 addAction("bullet", new RtfCharCommand("\xE2\x80\xA2")); // •
180 addAction("endash", new RtfCharCommand("\xE2\x80\x93")); // –
181 addAction("emdash", new RtfCharCommand("\xE2\x80\x94")); // —
182 addAction("~", new RtfCharCommand("\xC0\xA0")); //
183 addAction("enspace", new RtfCharCommand("\xE2\x80\x82")); //  
184 addAction("emspace", new RtfCharCommand("\xE2\x80\x83")); //  
185 addAction("lquote", new RtfCharCommand("\xE2\x80\x98")); // ‘
186 addAction("rquote", new RtfCharCommand("\xE2\x80\x99")); // ’
187 addAction("ldblquote", new RtfCharCommand("\xE2\x80\x9C")); // “
188 addAction("rdblquote", new RtfCharCommand("\xE2\x80\x9D")); // ”
190 addAction("jpegblip", new RtfPictureCommand("image/jpeg"));
191 addAction("pngblip", new RtfPictureCommand("image/png"));
193 addAction("s", new RtfStyleCommand());
195 addAction("qc", new RtfAlignmentCommand(ALIGN_CENTER
));
196 addAction("ql", new RtfAlignmentCommand(ALIGN_LEFT
));
197 addAction("qr", new RtfAlignmentCommand(ALIGN_RIGHT
));
198 addAction("qj", new RtfAlignmentCommand(ALIGN_JUSTIFY
));
199 addAction("pard", new RtfAlignmentCommand(ALIGN_UNDEFINED
));
201 addAction("b", new RtfFontPropertyCommand(RtfReader::FONT_BOLD
));
202 addAction("i", new RtfFontPropertyCommand(RtfReader::FONT_ITALIC
));
203 addAction("u", new RtfFontPropertyCommand(RtfReader::FONT_UNDERLINED
));
204 addAction("plain", new RtfFontResetCommand());
208 bool RtfReader::parseDocument() {
214 READ_KEYWORD_PARAMETER
215 } parserState
= READ_NORMAL_DATA
;
218 std::string parameterString
;
219 std::string hexString
;
220 int imageStartOffset
= -1;
222 while (!myIsInterrupted
) {
223 const char *ptr
= myStreamBuffer
;
224 const char *end
= myStreamBuffer
+ myStream
->read(myStreamBuffer
, rtfStreamBufferSize
);
228 const char *dataStart
= ptr
;
229 bool readNextChar
= true;
231 switch (parserState
) {
232 case READ_BINARY_DATA
:
234 processCharData(ptr
, 1);
236 if (myBinaryDataSize
== 0) {
237 parserState
= READ_NORMAL_DATA
;
240 case READ_NORMAL_DATA
:
243 if (ptr
> dataStart
) {
244 processCharData(dataStart
, ptr
- dataStart
);
247 myStateStack
.push(myState
);
248 myState
.ReadDataAsHex
= false;
252 if (ptr
> dataStart
) {
253 processCharData(dataStart
, ptr
- dataStart
);
257 if (imageStartOffset
>= 0) {
258 int imageSize
= myStream
->offset() + (ptr
- end
) - imageStartOffset
;
259 insertImage(myNextImageMimeType
, myFileName
, imageStartOffset
, imageSize
);
260 imageStartOffset
= -1;
263 if (myStateStack
.empty()) {
267 if (myState
.Destination
!= myStateStack
.top().Destination
) {
268 switchDestination(myState
.Destination
, false);
269 switchDestination(myStateStack
.top().Destination
, true);
272 bool oldItalic
= myState
.Italic
;
273 bool oldBold
= myState
.Bold
;
274 bool oldUnderlined
= myState
.Underlined
;
275 ZLTextAlignmentType oldAlignment
= myState
.Alignment
;
276 myState
= myStateStack
.top();
279 if (myState
.Italic
!= oldItalic
) {
280 setFontProperty(RtfReader::FONT_ITALIC
);
282 if (myState
.Bold
!= oldBold
) {
283 setFontProperty(RtfReader::FONT_BOLD
);
285 if (myState
.Underlined
!= oldUnderlined
) {
286 setFontProperty(RtfReader::FONT_UNDERLINED
);
288 if (myState
.Alignment
!= oldAlignment
) {
295 if (ptr
> dataStart
) {
296 processCharData(dataStart
, ptr
- dataStart
);
300 parserState
= READ_KEYWORD
;
303 case 0x0a: // cr and lf are noise characters...
304 if (ptr
> dataStart
) {
305 processCharData(dataStart
, ptr
- dataStart
);
310 if (myState
.ReadDataAsHex
) {
311 if (imageStartOffset
== -1) {
312 imageStartOffset
= myStream
->offset() + (ptr
- end
);
318 case READ_HEX_SYMBOL
:
320 if (hexString
.size() == 2) {
321 char ch
= strtol(hexString
.c_str(), 0, 16);
323 processCharData(&ch
, 1);
324 parserState
= READ_NORMAL_DATA
;
329 if (!isalpha(*ptr
)) {
330 if ((ptr
== dataStart
) && (keyword
.empty())) {
332 parserState
= READ_HEX_SYMBOL
;
335 processKeyword(keyword
);
336 parserState
= READ_NORMAL_DATA
;
340 keyword
.append(dataStart
, ptr
- dataStart
);
341 if ((*ptr
== '-') || isdigit(*ptr
)) {
343 parserState
= READ_KEYWORD_PARAMETER
;
345 readNextChar
= *ptr
== ' ';
346 processKeyword(keyword
);
347 parserState
= READ_NORMAL_DATA
;
348 dataStart
= readNextChar
? ptr
+ 1 : ptr
;
353 case READ_KEYWORD_PARAMETER
:
354 if (!isdigit(*ptr
)) {
355 parameterString
.append(dataStart
, ptr
- dataStart
);
356 int parameter
= atoi(parameterString
.c_str());
357 parameterString
.erase();
358 readNextChar
= *ptr
== ' ';
359 if ((keyword
== "bin") && (parameter
> 0)) {
360 myBinaryDataSize
= parameter
;
361 parserState
= READ_BINARY_DATA
;
363 processKeyword(keyword
, ¶meter
);
364 parserState
= READ_NORMAL_DATA
;
366 dataStart
= readNextChar
? ptr
+ 1 : ptr
;
376 if (dataStart
< end
) {
377 switch (parserState
) {
378 case READ_NORMAL_DATA
:
379 processCharData(dataStart
, end
- dataStart
);
381 keyword
.append(dataStart
, end
- dataStart
);
383 case READ_KEYWORD_PARAMETER
:
384 parameterString
.append(dataStart
, end
- dataStart
);
392 return myIsInterrupted
|| myStateStack
.empty();
395 void RtfReader::processKeyword(const std::string
&keyword
, int *parameter
) {
396 bool wasSpecialMode
= mySpecialMode
;
397 mySpecialMode
= false;
398 if (myState
.Destination
== RtfReader::DESTINATION_SKIP
) {
402 std::map
<std::string
, RtfCommand
*>::const_iterator it
= ourKeywordMap
.find(keyword
);
404 if (it
== ourKeywordMap
.end()) {
406 myState
.Destination
= RtfReader::DESTINATION_SKIP
;
410 it
->second
->run(*this, parameter
);
414 void RtfReader::processCharData(const char *data
, size_t len
, bool convert
) {
415 if (myState
.Destination
!= RtfReader::DESTINATION_SKIP
) {
416 addCharData(data
, len
, convert
);
420 void RtfReader::interrupt() {
421 myIsInterrupted
= true;
424 bool RtfReader::readDocument(const std::string
&fileName
) {
425 myFileName
= fileName
;
426 myStream
= ZLFile(fileName
).inputStream();
427 if (myStream
.isNull() || !myStream
->open()) {
433 myStreamBuffer
= new char[rtfStreamBufferSize
];
435 myIsInterrupted
= false;
437 mySpecialMode
= false;
439 myState
.Alignment
= ALIGN_UNDEFINED
;
440 myState
.Italic
= false;
441 myState
.Bold
= false;
442 myState
.Underlined
= false;
443 myState
.Destination
= RtfReader::DESTINATION_NONE
;
444 myState
.ReadDataAsHex
= false;
446 bool code
= parseDocument();
448 while (!myStateStack
.empty()) {
452 delete[] myStreamBuffer
;