vfs: check userland buffers before reading them.
[haiku.git] / src / add-ons / translators / rtf / RTF.cpp
blobea4663ce554fc3bb66800236af3c0c931968ebd6
1 /*
2 * Copyright 2004-2010, Axel Dörfler, axeld@pinc-software.de.
3 * Distributed under the terms of the MIT License.
4 */
7 #include "RTF.h"
9 #include <ctype.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
14 #include <DataIO.h>
17 //#define TRACE_RTF
18 #ifdef TRACE_RTF
19 # define TRACE(x...) printf(x)
20 #else
21 # define TRACE(x...) ;
22 #endif
25 static const char *kDestinationControlWords[] = {
26 "aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate",
27 "atnicn", "atnid", "atnparent", "atnref", "atntime", "atrfend",
28 "atrfstart", "author", "background", "bkmkend", "buptim", "colortbl",
29 "comment", "creatim", "do", "doccomm", "docvar", "fonttbl", "footer",
30 "footerf", "footerl", "footerr", "footnote", "ftncn", "ftnsep",
31 "ftnsepc", "header", "headerf", "headerl", "headerr", "info",
32 "keywords", "operator", "pict", "printim", "private1", "revtim",
33 "rxe", "stylesheet", "subject", "tc", "title", "txe", "xe",
36 static char read_char(BDataIO &stream, bool endOfFileAllowed = false) throw (status_t);
37 static int32 parse_integer(char first, BDataIO &stream, char &_last, int32 base = 10) throw (status_t);
40 using namespace RTF;
43 static char
44 read_char(BDataIO &stream, bool endOfFileAllowed) throw (status_t)
46 char c;
47 ssize_t bytesRead = stream.Read(&c, 1);
49 if (bytesRead < B_OK)
50 throw (status_t)bytesRead;
52 if (bytesRead == 0 && !endOfFileAllowed)
53 throw (status_t)B_ERROR;
55 return c;
59 static int32
60 parse_integer(char first, BDataIO &stream, char &_last, int32 base)
61 throw (status_t)
63 const char *kDigits = "0123456789abcdef";
64 int32 integer = 0;
65 int32 count = 0;
67 char digit = first;
69 if (digit == '\0')
70 digit = read_char(stream);
72 while (true) {
73 int32 pos = 0;
74 for (; pos < base; pos++) {
75 if (kDigits[pos] == tolower(digit)) {
76 integer = integer * base + pos;
77 count++;
78 break;
81 if (pos == base) {
82 _last = digit;
83 goto out;
86 digit = read_char(stream);
89 out:
90 if (count == 0)
91 throw (status_t)B_BAD_TYPE;
93 return integer;
97 static int
98 string_array_compare(const char *key, const char **array)
100 return strcmp(key, array[0]);
104 static void
105 dump(Element &element, int32 level = 0)
107 printf("%03" B_PRId32 " (%p):", level, &element);
108 for (int32 i = 0; i < level; i++)
109 printf(" ");
111 if (RTF::Header *header = dynamic_cast<RTF::Header *>(&element)) {
112 printf("<RTF header, major version %" B_PRId32 ">\n", header->Version());
113 } else if (RTF::Command *command = dynamic_cast<RTF::Command *>(&element)) {
114 printf("<Command: %s", command->Name());
115 if (command->HasOption())
116 printf(", Option %" B_PRId32, command->Option());
117 puts(">");
118 } else if (RTF::Text *text = dynamic_cast<RTF::Text *>(&element)) {
119 printf("<Text>");
120 puts(text->String());
121 } else if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element))
122 printf("<Group \"%s\">\n", group->Name());
124 if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) {
125 for (uint32 i = 0; i < group->CountElements(); i++)
126 dump(*group->ElementAt(i), level + 1);
131 // #pragma mark -
134 Parser::Parser(BPositionIO &stream)
136 fStream(&stream, 65536, false),
137 fIdentified(false)
142 status_t
143 Parser::Identify()
145 char header[5];
146 if (fStream.Read(header, sizeof(header)) < (ssize_t)sizeof(header))
147 return B_IO_ERROR;
149 if (strncmp(header, "{\\rtf", 5))
150 return B_BAD_TYPE;
152 fIdentified = true;
153 return B_OK;
157 status_t
158 Parser::Parse(Header &header)
160 if (!fIdentified && Identify() != B_OK)
161 return B_BAD_TYPE;
163 try {
164 int32 openBrackets = 1;
166 // since we already preparsed parts of the RTF header, the header
167 // is handled here directly
168 char last;
169 header.Parse('\0', fStream, last);
171 Group *parent = &header;
172 char c = last;
174 while (true) {
175 Element *element = NULL;
177 // we'll just ignore the end of the stream
178 if (parent == NULL)
179 return B_OK;
181 switch (c) {
182 case '{':
183 openBrackets++;
184 parent->AddElement(element = new Group());
185 parent = static_cast<Group *>(element);
186 break;
188 case '\\':
189 parent->AddElement(element = new Command());
190 break;
192 case '}':
193 openBrackets--;
194 parent->DetermineDestination();
195 parent = parent->Parent();
196 // supposed to fall through
197 case '\n':
198 case '\r':
200 ssize_t bytesRead = fStream.Read(&c, 1);
201 if (bytesRead < B_OK)
202 throw (status_t)bytesRead;
203 else if (bytesRead != 1) {
204 // this is the only valid exit status
205 if (openBrackets == 0)
206 return B_OK;
208 throw (status_t)B_ERROR;
210 continue;
213 default:
214 parent->AddElement(element = new Text());
215 break;
218 if (element == NULL)
219 throw (status_t)B_ERROR;
221 element->Parse(c, fStream, last);
222 c = last;
224 } catch (status_t status) {
225 return status;
228 return B_OK;
232 // #pragma mark -
235 Element::Element()
237 fParent(NULL)
242 Element::~Element()
247 void
248 Element::SetParent(Group *parent)
250 fParent = parent;
254 Group *
255 Element::Parent() const
257 return fParent;
261 bool
262 Element::IsDefinitionDelimiter()
264 return false;
268 void
269 Element::PrintToStream(int32 level)
271 dump(*this, level);
275 // #pragma mark -
278 Group::Group()
280 fDestination(TEXT_DESTINATION)
285 Group::~Group()
287 Element *element;
288 while ((element = (Element *)fElements.RemoveItem((int32)0)) != NULL) {
289 delete element;
294 void
295 Group::Parse(char first, BDataIO &stream, char &last) throw (status_t)
297 if (first == '\0')
298 first = read_char(stream);
300 if (first != '{')
301 throw (status_t)B_BAD_TYPE;
303 last = read_char(stream);
307 status_t
308 Group::AddElement(Element *element)
310 if (element == NULL)
311 return B_BAD_VALUE;
313 if (fElements.AddItem(element)) {
314 element->SetParent(this);
315 return B_OK;
318 return B_NO_MEMORY;
322 uint32
323 Group::CountElements() const
325 return (uint32)fElements.CountItems();
329 Element *
330 Group::ElementAt(uint32 index) const
332 return static_cast<Element *>(fElements.ItemAt(index));
336 Element *
337 Group::FindDefinitionStart(int32 index, int32 *_startIndex) const
339 if (index < 0)
340 return NULL;
342 Element *element;
343 int32 number = 0;
344 for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) {
345 if (number == index) {
346 if (_startIndex)
347 *_startIndex = i;
348 return element;
351 if (element->IsDefinitionDelimiter())
352 number++;
355 return NULL;
359 Command *
360 Group::FindDefinition(const char *name, int32 index) const
362 int32 startIndex;
363 Element *element = FindDefinitionStart(index, &startIndex);
364 if (element == NULL)
365 return NULL;
367 for (uint32 i = startIndex; (element = ElementAt(i)) != NULL; i++) {
368 if (element->IsDefinitionDelimiter())
369 break;
371 if (Command *command = dynamic_cast<Command *>(element)) {
372 if (command != NULL && !strcmp(name, command->Name()))
373 return command;
377 return NULL;
381 Group *
382 Group::FindGroup(const char *name) const
384 Element *element;
385 for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) {
386 Group *group = dynamic_cast<Group *>(element);
387 if (group == NULL)
388 continue;
390 Command *command = dynamic_cast<Command *>(group->ElementAt(0));
391 if (command != NULL && !strcmp(name, command->Name()))
392 return group;
395 return NULL;
399 const char *
400 Group::Name() const
402 Command *command = dynamic_cast<Command *>(ElementAt(0));
403 if (command != NULL)
404 return command->Name();
406 return NULL;
410 void
411 Group::DetermineDestination()
413 const char *name = Name();
414 if (name == NULL)
415 return;
417 if (!strcmp(name, "*")) {
418 fDestination = COMMENT_DESTINATION;
419 return;
422 // binary search for destination control words
424 if (bsearch(name, kDestinationControlWords,
425 sizeof(kDestinationControlWords) / sizeof(kDestinationControlWords[0]),
426 sizeof(kDestinationControlWords[0]),
427 (int (*)(const void *, const void *))string_array_compare) != NULL)
428 fDestination = OTHER_DESTINATION;
432 group_destination
433 Group::Destination() const
435 return fDestination;
439 // #pragma mark -
442 Header::Header()
444 fVersion(0)
449 Header::~Header()
454 void
455 Header::Parse(char first, BDataIO &stream, char &last) throw (status_t)
457 // The stream has been peeked into by the parser already, and
458 // only the version follows in the stream -- let's pick it up
460 fVersion = parse_integer(first, stream, last);
462 // recreate "rtf" command to name this group
464 Command *command = new Command();
465 command->SetName("rtf");
466 command->SetOption(fVersion);
468 AddElement(command);
472 int32
473 Header::Version() const
475 return fVersion;
479 const char *
480 Header::Charset() const
482 Command *command = dynamic_cast<Command *>(ElementAt(1));
483 if (command == NULL)
484 return NULL;
486 return command->Name();
490 rgb_color
491 Header::Color(int32 index)
493 rgb_color color = {0, 0, 0, 255};
495 Group *colorTable = FindGroup("colortbl");
497 if (colorTable != NULL) {
498 if (Command *gun = colorTable->FindDefinition("red", index))
499 color.red = gun->Option();
500 if (Command *gun = colorTable->FindDefinition("green", index))
501 color.green = gun->Option();
502 if (Command *gun = colorTable->FindDefinition("blue", index))
503 color.blue = gun->Option();
506 return color;
510 // #pragma mark -
513 Text::Text()
518 Text::~Text()
520 SetTo(NULL);
524 bool
525 Text::IsDefinitionDelimiter()
527 return fText == ";";
531 void
532 Text::Parse(char first, BDataIO &stream, char &last) throw (status_t)
534 char c = first;
535 if (c == '\0')
536 c = read_char(stream);
538 if (c == ';') {
539 // definition delimiter
540 fText.SetTo(";");
541 last = read_char(stream);
542 return;
545 const size_t kBufferSteps = 1;
546 size_t maxSize = kBufferSteps;
547 char *text = fText.LockBuffer(maxSize);
548 if (text == NULL)
549 throw (status_t)B_NO_MEMORY;
551 size_t position = 0;
553 while (true) {
554 if (c == '\\' || c == '}' || c == '{' || c == ';' || c == '\n' || c == '\r')
555 break;
557 if (position >= maxSize) {
558 fText.UnlockBuffer(position);
559 text = fText.LockBuffer(maxSize += kBufferSteps);
560 if (text == NULL)
561 throw (status_t)B_NO_MEMORY;
564 text[position++] = c;
566 c = read_char(stream);
568 fText.UnlockBuffer(position);
570 // ToDo: add support for different charsets - right now, only ASCII is supported!
571 // To achieve this, we should just translate everything into UTF-8 here
573 last = c;
577 status_t
578 Text::SetTo(const char *text)
580 return fText.SetTo(text) != NULL ? B_OK : B_NO_MEMORY;
584 const char *
585 Text::String() const
587 return fText.String();
591 uint32
592 Text::Length() const
594 return fText.Length();
598 // #pragma mark -
601 Command::Command()
603 fName(NULL),
604 fHasOption(false),
605 fOption(-1)
610 Command::~Command()
615 void
616 Command::Parse(char first, BDataIO &stream, char &last) throw (status_t)
618 if (first == '\0')
619 first = read_char(stream);
621 if (first != '\\')
622 throw (status_t)B_BAD_TYPE;
624 // get name
625 char name[kCommandLength];
626 size_t length = 0;
627 char c;
628 while (isalpha(c = read_char(stream))) {
629 name[length++] = c;
630 if (length >= kCommandLength - 1)
631 throw (status_t)B_BAD_TYPE;
634 if (length == 0) {
635 if (c == '\n' || c == '\r') {
636 // we're a hard return
637 fName.SetTo("par");
638 } else
639 fName.SetTo(c, 1);
641 // read over character
642 c = read_char(stream);
643 } else
644 fName.SetTo(name, length);
646 TRACE("command: %s\n", fName.String());
648 // parse numeric option
650 if (c == '-')
651 c = read_char(stream);
653 last = c;
655 if (fName == "'") {
656 // hexadecimal
657 char bytes[2];
658 bytes[0] = read_char(stream);
659 bytes[1] = '\0';
660 BMemoryIO memory(bytes, 2);
662 SetOption(parse_integer(c, memory, last, 16));
663 last = read_char(stream);
664 } else {
665 // decimal
666 if (isdigit(c))
667 SetOption(parse_integer(c, stream, last));
669 // a space delimiter is eaten up by the command
670 if (isspace(last))
671 last = read_char(stream);
674 if (HasOption())
675 TRACE(" option: %ld\n", fOption);
679 status_t
680 Command::SetName(const char *name)
682 return fName.SetTo(name) != NULL ? B_OK : B_NO_MEMORY;
686 const char *
687 Command::Name()
689 return fName.String();
693 void
694 Command::UnsetOption()
696 fHasOption = false;
697 fOption = -1;
701 void
702 Command::SetOption(int32 option)
704 fOption = option;
705 fHasOption = true;
709 bool
710 Command::HasOption() const
712 return fHasOption;
716 int32
717 Command::Option() const
719 return fOption;
723 // #pragma mark -
726 Iterator::Iterator(Element &start, group_destination destination)
728 SetTo(start, destination);
732 void
733 Iterator::SetTo(Element &start, group_destination destination)
735 fStart = &start;
736 fDestination = destination;
738 Rewind();
742 void
743 Iterator::Rewind()
745 fStack.MakeEmpty();
746 fStack.Push(fStart);
750 bool
751 Iterator::HasNext() const
753 return !fStack.IsEmpty();
757 Element *
758 Iterator::Next()
760 Element *element;
762 if (!fStack.Pop(&element))
763 return NULL;
765 Group *group = dynamic_cast<Group *>(element);
766 if (group != NULL
767 && (fDestination == ALL_DESTINATIONS
768 || fDestination == group->Destination())) {
769 // put this group's children on the stack in
770 // reverse order, so that we iterate over
771 // the tree in in-order
773 for (int32 i = group->CountElements(); i-- > 0;) {
774 fStack.Push(group->ElementAt(i));
778 return element;
782 // #pragma mark -
785 Worker::Worker(RTF::Header &start)
787 fStart(start)
792 Worker::~Worker()
797 void
798 Worker::Dispatch(Element *element)
800 if (RTF::Group *group = dynamic_cast<RTF::Group *>(element)) {
801 fSkip = false;
802 Group(group);
804 if (fSkip)
805 return;
807 for (int32 i = 0; (element = group->ElementAt(i)) != NULL; i++)
808 Dispatch(element);
810 GroupEnd(group);
811 } else if (RTF::Command *command = dynamic_cast<RTF::Command *>(element)) {
812 Command(command);
813 } else if (RTF::Text *text = dynamic_cast<RTF::Text *>(element)) {
814 Text(text);
819 void
820 Worker::Work() throw (status_t)
822 Dispatch(&fStart);
826 void
827 Worker::Group(RTF::Group *group)
832 void
833 Worker::GroupEnd(RTF::Group *group)
838 void
839 Worker::Command(RTF::Command *command)
844 void
845 Worker::Text(RTF::Text *text)
850 RTF::Header &
851 Worker::Start()
853 return fStart;
857 void
858 Worker::Skip()
860 fSkip = true;
864 void
865 Worker::Abort(status_t status)
867 throw status;