gui
[lbook_fbreader.git] / fbreader / src / formats / rtf / RtfReader.cpp
blob1de87fd33a9adfe04b5636311e463faa398ec036
1 /*
2 * Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
20 #include <stdlib.h>
22 #include <cctype>
24 #include <ZLFile.h>
25 #include <ZLInputStream.h>
27 #include "RtfReader.h"
29 std::map<std::string, RtfCommand*> RtfReader::ourKeywordMap;
31 static const int rtfStreamBufferSize = 4096;
33 RtfReader::RtfReader(const std::string &encoding) : EncodedTextReader(encoding) {
36 RtfReader::~RtfReader() {
39 RtfCommand::~RtfCommand() {
42 void RtfNewParagraphCommand::run(RtfReader &reader, int*) const {
43 reader.newParagraph();
46 RtfFontPropertyCommand::RtfFontPropertyCommand(RtfReader::FontProperty property) : myProperty(property) {
49 void RtfFontPropertyCommand::run(RtfReader &reader, int *parameter) const {
50 bool start = (parameter == 0) || (*parameter != 0);
51 switch (myProperty) {
52 case RtfReader::FONT_BOLD:
53 if (reader.myState.Bold != start) {
54 reader.myState.Bold = start;
55 reader.setFontProperty(RtfReader::FONT_BOLD);
57 break;
58 case RtfReader::FONT_ITALIC:
59 if (reader.myState.Italic != start) {
60 reader.myState.Italic = start;
61 reader.setFontProperty(RtfReader::FONT_ITALIC);
63 break;
64 case RtfReader::FONT_UNDERLINED:
65 if (reader.myState.Underlined != start) {
66 reader.myState.Underlined = start;
67 reader.setFontProperty(RtfReader::FONT_UNDERLINED);
69 break;
73 RtfAlignmentCommand::RtfAlignmentCommand(ZLTextAlignmentType alignment) : myAlignment(alignment) {
76 void RtfAlignmentCommand::run(RtfReader &reader, int*) const {
77 if (reader.myState.Alignment != myAlignment) {
78 reader.myState.Alignment = myAlignment;
79 reader.setAlignment();
83 RtfCharCommand::RtfCharCommand(const std::string &chr) : myChar(chr) {
86 void RtfCharCommand::run(RtfReader &reader, int*) const {
87 reader.processCharData(myChar.data(), myChar.length(), false);
90 RtfDestinationCommand::RtfDestinationCommand(RtfReader::DestinationType destination) : myDestination(destination) {
93 void RtfDestinationCommand::run(RtfReader &reader, int*) const {
94 if (reader.myState.Destination == myDestination) {
95 return;
97 reader.myState.Destination = myDestination;
98 if (myDestination == RtfReader::DESTINATION_PICTURE) {
99 reader.myState.ReadDataAsHex = true;
101 reader.switchDestination(myDestination, true);
104 void RtfStyleCommand::run(RtfReader &reader, int*) const {
105 if (reader.myState.Destination == RtfReader::DESTINATION_STYLESHEET) {
106 //std::cerr << "Add style index: " << val << "\n";
108 //sprintf(style_attributes[0], "%i", val);
109 } else /*if (myState.Destination == rdsContent)*/ {
110 //std::cerr << "Set style index: " << val << "\n";
112 //sprintf(style_attributes[0], "%i", val);
116 void RtfCodepageCommand::run(RtfReader &reader, int *parameter) const {
117 if (parameter != 0) {
118 reader.setEncoding(*parameter);
122 void RtfSpecialCommand::run(RtfReader &reader, int*) const {
123 reader.mySpecialMode = true;
126 RtfPictureCommand::RtfPictureCommand(const std::string &mimeType) : myMimeType(mimeType) {
129 void RtfPictureCommand::run(RtfReader &reader, int*) const {
130 reader.myNextImageMimeType = myMimeType;
133 void RtfFontResetCommand::run(RtfReader &reader, int*) const {
134 if (reader.myState.Bold) {
135 reader.myState.Bold = false;
136 reader.setFontProperty(RtfReader::FONT_BOLD);
138 if (reader.myState.Italic) {
139 reader.myState.Italic = false;
140 reader.setFontProperty(RtfReader::FONT_ITALIC);
142 if (reader.myState.Underlined) {
143 reader.myState.Underlined = false;
144 reader.setFontProperty(RtfReader::FONT_UNDERLINED);
148 void RtfReader::addAction(const std::string &tag, RtfCommand *command) {
149 ourKeywordMap.insert(std::pair<std::string,RtfCommand*>(tag, command));
152 void RtfReader::fillKeywordMap() {
153 if (ourKeywordMap.empty()) {
154 addAction("*", new RtfSpecialCommand());
155 addAction("ansicpg", new RtfCodepageCommand());
157 static const char *keywordsToSkip[] = {"buptim", "colortbl", "comment", "creatim", "doccomm", "fonttbl", "footer", "footerf", "footerl", "footerr", "ftncn", "ftnsep", "ftnsepc", "header", "headerf", "headerl", "headerr", "keywords", "operator", "printim", "private1", "revtim", "rxe", "subject", "tc", "txe", "xe", 0};
158 RtfCommand *skipCommand = new RtfDestinationCommand(RtfReader::DESTINATION_SKIP);
159 for (const char **i = keywordsToSkip; *i != 0; ++i) {
160 addAction(*i, skipCommand);
162 addAction("info", new RtfDestinationCommand(RtfReader::DESTINATION_INFO));
163 addAction("title", new RtfDestinationCommand(RtfReader::DESTINATION_TITLE));
164 addAction("author", new RtfDestinationCommand(RtfReader::DESTINATION_AUTHOR));
165 addAction("pict", new RtfDestinationCommand(RtfReader::DESTINATION_PICTURE));
166 addAction("stylesheet", new RtfDestinationCommand(RtfReader::DESTINATION_STYLESHEET));
167 addAction("footnote", new RtfDestinationCommand(RtfReader::DESTINATION_FOOTNOTE));
169 RtfCommand *newParagraphCommand = new RtfNewParagraphCommand();
170 addAction("\n", newParagraphCommand);
171 addAction("\r", newParagraphCommand);
172 addAction("par", newParagraphCommand);
174 addAction("\x09", new RtfCharCommand("\x09"));
175 addAction("_", new RtfCharCommand("-"));
176 addAction("\\", new RtfCharCommand("\\"));
177 addAction("{", new RtfCharCommand("{"));
178 addAction("}", new RtfCharCommand("}"));
179 addAction("bullet", new RtfCharCommand("\xE2\x80\xA2")); // &bullet;
180 addAction("endash", new RtfCharCommand("\xE2\x80\x93")); // &ndash;
181 addAction("emdash", new RtfCharCommand("\xE2\x80\x94")); // &mdash;
182 addAction("~", new RtfCharCommand("\xC0\xA0")); // &nbsp;
183 addAction("enspace", new RtfCharCommand("\xE2\x80\x82")); // &emsp;
184 addAction("emspace", new RtfCharCommand("\xE2\x80\x83")); // &ensp;
185 addAction("lquote", new RtfCharCommand("\xE2\x80\x98")); // &lsquo;
186 addAction("rquote", new RtfCharCommand("\xE2\x80\x99")); // &rsquo;
187 addAction("ldblquote", new RtfCharCommand("\xE2\x80\x9C")); // &ldquo;
188 addAction("rdblquote", new RtfCharCommand("\xE2\x80\x9D")); // &rdquo;
190 addAction("jpegblip", new RtfPictureCommand("image/jpeg"));
191 addAction("pngblip", new RtfPictureCommand("image/png"));
193 addAction("s", new RtfStyleCommand());
195 addAction("qc", new RtfAlignmentCommand(ALIGN_CENTER));
196 addAction("ql", new RtfAlignmentCommand(ALIGN_LEFT));
197 addAction("qr", new RtfAlignmentCommand(ALIGN_RIGHT));
198 addAction("qj", new RtfAlignmentCommand(ALIGN_JUSTIFY));
199 addAction("pard", new RtfAlignmentCommand(ALIGN_UNDEFINED));
201 addAction("b", new RtfFontPropertyCommand(RtfReader::FONT_BOLD));
202 addAction("i", new RtfFontPropertyCommand(RtfReader::FONT_ITALIC));
203 addAction("u", new RtfFontPropertyCommand(RtfReader::FONT_UNDERLINED));
204 addAction("plain", new RtfFontResetCommand());
208 bool RtfReader::parseDocument() {
209 enum {
210 READ_NORMAL_DATA,
211 READ_BINARY_DATA,
212 READ_HEX_SYMBOL,
213 READ_KEYWORD,
214 READ_KEYWORD_PARAMETER
215 } parserState = READ_NORMAL_DATA;
217 std::string keyword;
218 std::string parameterString;
219 std::string hexString;
220 int imageStartOffset = -1;
222 while (!myIsInterrupted) {
223 const char *ptr = myStreamBuffer;
224 const char *end = myStreamBuffer + myStream->read(myStreamBuffer, rtfStreamBufferSize);
225 if (ptr == end) {
226 break;
228 const char *dataStart = ptr;
229 bool readNextChar = true;
230 while (ptr != end) {
231 switch (parserState) {
232 case READ_BINARY_DATA:
233 // TODO: optimize
234 processCharData(ptr, 1);
235 --myBinaryDataSize;
236 if (myBinaryDataSize == 0) {
237 parserState = READ_NORMAL_DATA;
239 break;
240 case READ_NORMAL_DATA:
241 switch (*ptr) {
242 case '{':
243 if (ptr > dataStart) {
244 processCharData(dataStart, ptr - dataStart);
246 dataStart = ptr + 1;
247 myStateStack.push(myState);
248 myState.ReadDataAsHex = false;
249 break;
250 case '}':
252 if (ptr > dataStart) {
253 processCharData(dataStart, ptr - dataStart);
255 dataStart = ptr + 1;
257 if (imageStartOffset >= 0) {
258 int imageSize = myStream->offset() + (ptr - end) - imageStartOffset;
259 insertImage(myNextImageMimeType, myFileName, imageStartOffset, imageSize);
260 imageStartOffset = -1;
263 if (myStateStack.empty()) {
264 return false;
267 if (myState.Destination != myStateStack.top().Destination) {
268 switchDestination(myState.Destination, false);
269 switchDestination(myStateStack.top().Destination, true);
272 bool oldItalic = myState.Italic;
273 bool oldBold = myState.Bold;
274 bool oldUnderlined = myState.Underlined;
275 ZLTextAlignmentType oldAlignment = myState.Alignment;
276 myState = myStateStack.top();
277 myStateStack.pop();
279 if (myState.Italic != oldItalic) {
280 setFontProperty(RtfReader::FONT_ITALIC);
282 if (myState.Bold != oldBold) {
283 setFontProperty(RtfReader::FONT_BOLD);
285 if (myState.Underlined != oldUnderlined) {
286 setFontProperty(RtfReader::FONT_UNDERLINED);
288 if (myState.Alignment != oldAlignment) {
289 setAlignment();
292 break;
294 case '\\':
295 if (ptr > dataStart) {
296 processCharData(dataStart, ptr - dataStart);
298 dataStart = ptr + 1;
299 keyword.erase();
300 parserState = READ_KEYWORD;
301 break;
302 case 0x0d:
303 case 0x0a: // cr and lf are noise characters...
304 if (ptr > dataStart) {
305 processCharData(dataStart, ptr - dataStart);
307 dataStart = ptr + 1;
308 break;
309 default:
310 if (myState.ReadDataAsHex) {
311 if (imageStartOffset == -1) {
312 imageStartOffset = myStream->offset() + (ptr - end);
315 break;
317 break;
318 case READ_HEX_SYMBOL:
319 hexString += *ptr;
320 if (hexString.size() == 2) {
321 char ch = strtol(hexString.c_str(), 0, 16);
322 hexString.erase();
323 processCharData(&ch, 1);
324 parserState = READ_NORMAL_DATA;
325 dataStart = ptr + 1;
327 break;
328 case READ_KEYWORD:
329 if (!isalpha(*ptr)) {
330 if ((ptr == dataStart) && (keyword.empty())) {
331 if (*ptr == '\'') {
332 parserState = READ_HEX_SYMBOL;
333 } else {
334 keyword = *ptr;
335 processKeyword(keyword);
336 parserState = READ_NORMAL_DATA;
338 dataStart = ptr + 1;
339 } else {
340 keyword.append(dataStart, ptr - dataStart);
341 if ((*ptr == '-') || isdigit(*ptr)) {
342 dataStart = ptr;
343 parserState = READ_KEYWORD_PARAMETER;
344 } else {
345 readNextChar = *ptr == ' ';
346 processKeyword(keyword);
347 parserState = READ_NORMAL_DATA;
348 dataStart = readNextChar ? ptr + 1 : ptr;
352 break;
353 case READ_KEYWORD_PARAMETER:
354 if (!isdigit(*ptr)) {
355 parameterString.append(dataStart, ptr - dataStart);
356 int parameter = atoi(parameterString.c_str());
357 parameterString.erase();
358 readNextChar = *ptr == ' ';
359 if ((keyword == "bin") && (parameter > 0)) {
360 myBinaryDataSize = parameter;
361 parserState = READ_BINARY_DATA;
362 } else {
363 processKeyword(keyword, &parameter);
364 parserState = READ_NORMAL_DATA;
366 dataStart = readNextChar ? ptr + 1 : ptr;
368 break;
370 if (readNextChar) {
371 ++ptr;
372 } else {
373 readNextChar = true;
376 if (dataStart < end) {
377 switch (parserState) {
378 case READ_NORMAL_DATA:
379 processCharData(dataStart, end - dataStart);
380 case READ_KEYWORD:
381 keyword.append(dataStart, end - dataStart);
382 break;
383 case READ_KEYWORD_PARAMETER:
384 parameterString.append(dataStart, end - dataStart);
385 break;
386 default:
387 break;
392 return myIsInterrupted || myStateStack.empty();
395 void RtfReader::processKeyword(const std::string &keyword, int *parameter) {
396 bool wasSpecialMode = mySpecialMode;
397 mySpecialMode = false;
398 if (myState.Destination == RtfReader::DESTINATION_SKIP) {
399 return;
402 std::map<std::string, RtfCommand*>::const_iterator it = ourKeywordMap.find(keyword);
404 if (it == ourKeywordMap.end()) {
405 if (wasSpecialMode)
406 myState.Destination = RtfReader::DESTINATION_SKIP;
407 return;
410 it->second->run(*this, parameter);
414 void RtfReader::processCharData(const char *data, size_t len, bool convert) {
415 if (myState.Destination != RtfReader::DESTINATION_SKIP) {
416 addCharData(data, len, convert);
420 void RtfReader::interrupt() {
421 myIsInterrupted = true;
424 bool RtfReader::readDocument(const std::string &fileName) {
425 myFileName = fileName;
426 myStream = ZLFile(fileName).inputStream();
427 if (myStream.isNull() || !myStream->open()) {
428 return false;
431 fillKeywordMap();
433 myStreamBuffer = new char[rtfStreamBufferSize];
435 myIsInterrupted = false;
437 mySpecialMode = false;
439 myState.Alignment = ALIGN_UNDEFINED;
440 myState.Italic = false;
441 myState.Bold = false;
442 myState.Underlined = false;
443 myState.Destination = RtfReader::DESTINATION_NONE;
444 myState.ReadDataAsHex = false;
446 bool code = parseDocument();
448 while (!myStateStack.empty()) {
449 myStateStack.pop();
452 delete[] myStreamBuffer;
453 myStream->close();
455 return code;