Add initial bits for Qt6 support
[carla.git] / source / modules / water / xml / XmlDocument.cpp
blob08b0be686d66355cac908fb90f7ac25a14d7895f
1 /*
2 ==============================================================================
4 This file is part of the Water library.
5 Copyright (c) 2016 ROLI Ltd.
6 Copyright (C) 2017-2022 Filipe Coelho <falktx@falktx.com>
8 Permission is granted to use this software under the terms of the ISC license
9 http://www.isc.org/downloads/software-support-policy/isc-license/
11 Permission to use, copy, modify, and/or distribute this software for any
12 purpose with or without fee is hereby granted, provided that the above
13 copyright notice and this permission notice appear in all copies.
15 THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH REGARD
16 TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
18 OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
19 USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
20 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
21 OF THIS SOFTWARE.
23 ==============================================================================
26 #include "XmlDocument.h"
27 #include "XmlElement.h"
28 #include "../containers/LinkedListPointer.h"
29 #include "../streams/FileInputSource.h"
30 #include "../streams/InputStream.h"
31 #include "../streams/MemoryOutputStream.h"
33 namespace water {
35 XmlDocument::XmlDocument (const String& documentText)
36 : originalText (documentText),
37 input (nullptr),
38 outOfData (false),
39 errorOccurred (false),
40 needToLoadDTD (false),
41 ignoreEmptyTextElements (true)
45 XmlDocument::XmlDocument (const File& file)
46 : input (nullptr),
47 outOfData (false),
48 errorOccurred (false),
49 needToLoadDTD (false),
50 ignoreEmptyTextElements (true),
51 inputSource (new FileInputSource (file))
55 XmlDocument::~XmlDocument()
59 XmlElement* XmlDocument::parse (const File& file)
61 XmlDocument doc (file);
62 return doc.getDocumentElement();
65 XmlElement* XmlDocument::parse (const String& xmlData)
67 XmlDocument doc (xmlData);
68 return doc.getDocumentElement();
71 void XmlDocument::setInputSource (FileInputSource* const newSource) noexcept
73 inputSource = newSource;
76 void XmlDocument::setEmptyTextElementsIgnored (const bool shouldBeIgnored) noexcept
78 ignoreEmptyTextElements = shouldBeIgnored;
81 namespace XmlIdentifierChars
83 static bool isIdentifierCharSlow (const water_uchar c) noexcept
85 return CharacterFunctions::isLetterOrDigit (c)
86 || c == '_' || c == '-' || c == ':' || c == '.';
89 static bool isIdentifierChar (const water_uchar c) noexcept
91 static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
93 return ((int) c < (int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
94 : isIdentifierCharSlow (c);
97 /*static void generateIdentifierCharConstants()
99 uint32 n[8] = { 0 };
100 for (int i = 0; i < 256; ++i)
101 if (isIdentifierCharSlow (i))
102 n[i >> 5] |= (1 << (i & 31));
104 String s;
105 for (int i = 0; i < 8; ++i)
106 s << "0x" << String::toHexString ((int) n[i]) << ", ";
108 DBG (s);
111 static CharPointer_UTF8 findEndOfToken (CharPointer_UTF8 p)
113 while (isIdentifierChar (*p))
114 ++p;
116 return p;
120 XmlElement* XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
122 if (originalText.isEmpty() && inputSource != nullptr)
124 CarlaScopedPointer<InputStream> in (inputSource->createInputStream());
126 if (in != nullptr)
128 MemoryOutputStream data;
129 data.writeFromInputStream (*in, onlyReadOuterDocumentElement ? 8192 : -1);
131 if (data.getDataSize() > 2)
133 data.writeByte (0);
134 const char* text = static_cast<const char*> (data.getData());
136 if (CharPointer_UTF8::isByteOrderMark (text))
137 text += 3;
139 // parse the input buffer directly to avoid copying it all to a string..
140 return parseDocumentElement (CharPointer_UTF8 (text), onlyReadOuterDocumentElement);
145 return parseDocumentElement (originalText.getCharPointer(), onlyReadOuterDocumentElement);
148 const String& XmlDocument::getLastParseError() const noexcept
150 return lastError;
153 void XmlDocument::setLastError (const String& desc, const bool carryOn)
155 lastError = desc;
156 errorOccurred = ! carryOn;
159 String XmlDocument::getFileContents (const String& filename) const
161 if (inputSource != nullptr)
163 const CarlaScopedPointer<InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
165 if (in != nullptr)
166 return in->readEntireStreamAsString();
169 return String();
172 water_uchar XmlDocument::readNextChar() noexcept
174 const water_uchar c = input.getAndAdvance();
176 if (c == 0)
178 outOfData = true;
179 --input;
182 return c;
185 XmlElement* XmlDocument::parseDocumentElement (CharPointer_UTF8 textToParse,
186 const bool onlyReadOuterDocumentElement)
188 input = textToParse;
189 errorOccurred = false;
190 outOfData = false;
191 needToLoadDTD = true;
193 if (textToParse.isEmpty())
195 lastError = "not enough input";
197 else if (! parseHeader())
199 lastError = "malformed header";
201 else if (! parseDTD())
203 lastError = "malformed DTD";
205 else
207 lastError.clear();
209 CarlaScopedPointer<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
211 if (! errorOccurred)
212 return result.release();
215 return nullptr;
218 bool XmlDocument::parseHeader()
220 skipNextWhiteSpace();
222 if (CharacterFunctions::compareUpTo (input, CharPointer_UTF8 ("<?xml"), 5) == 0)
224 const CharPointer_UTF8 headerEnd (CharacterFunctions::find (input, CharPointer_UTF8 ("?>")));
226 if (headerEnd.isEmpty())
227 return false;
229 const String encoding (String (input, headerEnd)
230 .fromFirstOccurrenceOf ("encoding", false, true)
231 .fromFirstOccurrenceOf ("=", false, false)
232 .fromFirstOccurrenceOf ("\"", false, false)
233 .upToFirstOccurrenceOf ("\"", false, false).trim());
235 /* If you load an XML document with a non-UTF encoding type, it may have been
236 loaded wrongly.. Since all the files are read via the normal water file streams,
237 they're treated as UTF-8, so by the time it gets to the parser, the encoding will
238 have been lost. Best plan is to stick to utf-8 or if you have specific files to
239 read, use your own code to convert them to a unicode String, and pass that to the
240 XML parser.
242 CARLA_SAFE_ASSERT_RETURN (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-"), false);
244 input = headerEnd + 2;
245 skipNextWhiteSpace();
248 return true;
251 bool XmlDocument::parseDTD()
253 if (CharacterFunctions::compareUpTo (input, CharPointer_UTF8 ("<!DOCTYPE"), 9) == 0)
255 input += 9;
256 const CharPointer_UTF8 dtdStart (input);
258 for (int n = 1; n > 0;)
260 const water_uchar c = readNextChar();
262 if (outOfData)
263 return false;
265 if (c == '<')
266 ++n;
267 else if (c == '>')
268 --n;
271 dtdText = String (dtdStart, input - 1).trim();
274 return true;
277 void XmlDocument::skipNextWhiteSpace()
279 for (;;)
281 input = input.findEndOfWhitespace();
283 if (input.isEmpty())
285 outOfData = true;
286 break;
289 if (*input == '<')
291 if (input[1] == '!'
292 && input[2] == '-'
293 && input[3] == '-')
295 input += 4;
296 const int closeComment = input.indexOf (CharPointer_UTF8 ("-->"));
298 if (closeComment < 0)
300 outOfData = true;
301 break;
304 input += closeComment + 3;
305 continue;
308 if (input[1] == '?')
310 input += 2;
311 const int closeBracket = input.indexOf (CharPointer_UTF8 ("?>"));
313 if (closeBracket < 0)
315 outOfData = true;
316 break;
319 input += closeBracket + 2;
320 continue;
324 break;
328 void XmlDocument::readQuotedString (String& result)
330 const water_uchar quote = readNextChar();
332 while (! outOfData)
334 const water_uchar c = readNextChar();
336 if (c == quote)
337 break;
339 --input;
341 if (c == '&')
343 readEntity (result);
345 else
347 const CharPointer_UTF8 start (input);
349 for (;;)
351 const water_uchar character = *input;
353 if (character == quote)
355 result.appendCharPointer (start, input);
356 ++input;
357 return;
359 else if (character == '&')
361 result.appendCharPointer (start, input);
362 break;
364 else if (character == 0)
366 setLastError ("unmatched quotes", false);
367 outOfData = true;
368 break;
371 ++input;
377 XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
379 XmlElement* node = nullptr;
381 skipNextWhiteSpace();
382 if (outOfData)
383 return nullptr;
385 if (*input == '<')
387 ++input;
388 CharPointer_UTF8 endOfToken (XmlIdentifierChars::findEndOfToken (input));
390 if (endOfToken == input)
392 // no tag name - but allow for a gap after the '<' before giving an error
393 skipNextWhiteSpace();
394 endOfToken = XmlIdentifierChars::findEndOfToken (input);
396 if (endOfToken == input)
398 setLastError ("tag name missing", false);
399 return node;
403 node = new XmlElement (input, endOfToken);
404 input = endOfToken;
405 LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
407 // look for attributes
408 for (;;)
410 skipNextWhiteSpace();
412 const water_uchar c = *input;
414 // empty tag..
415 if (c == '/' && input[1] == '>')
417 input += 2;
418 break;
421 // parse the guts of the element..
422 if (c == '>')
424 ++input;
426 if (alsoParseSubElements)
427 readChildElements (*node);
429 break;
432 // get an attribute..
433 if (XmlIdentifierChars::isIdentifierChar (c))
435 CharPointer_UTF8 attNameEnd (XmlIdentifierChars::findEndOfToken (input));
437 if (attNameEnd != input)
439 const CharPointer_UTF8 attNameStart (input);
440 input = attNameEnd;
442 skipNextWhiteSpace();
444 if (readNextChar() == '=')
446 skipNextWhiteSpace();
448 const water_uchar nextChar = *input;
450 if (nextChar == '"' || nextChar == '\'')
452 XmlElement::XmlAttributeNode* const newAtt
453 = new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
455 readQuotedString (newAtt->value);
456 attributeAppender.append (newAtt);
457 continue;
460 else
462 setLastError ("expected '=' after attribute '"
463 + String (attNameStart, attNameEnd) + "'", false);
464 return node;
468 else
470 if (! outOfData)
471 setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false);
474 break;
478 return node;
481 void XmlDocument::readChildElements (XmlElement& parent)
483 LinkedListPointer<XmlElement>::Appender childAppender (parent.firstChildElement);
485 for (;;)
487 const CharPointer_UTF8 preWhitespaceInput (input);
488 skipNextWhiteSpace();
490 if (outOfData)
492 setLastError ("unmatched tags", false);
493 break;
496 if (*input == '<')
498 const water_uchar c1 = input[1];
500 if (c1 == '/')
502 // our close tag..
503 const int closeTag = input.indexOf ((water_uchar) '>');
505 if (closeTag >= 0)
506 input += closeTag + 1;
508 break;
511 if (c1 == '!' && CharacterFunctions::compareUpTo (input + 2, CharPointer_UTF8 ("[CDATA["), 7) == 0)
513 input += 9;
514 const CharPointer_UTF8 inputStart (input);
516 for (;;)
518 const water_uchar c0 = *input;
520 if (c0 == 0)
522 setLastError ("unterminated CDATA section", false);
523 outOfData = true;
524 break;
526 else if (c0 == ']'
527 && input[1] == ']'
528 && input[2] == '>')
530 childAppender.append (XmlElement::createTextElement (String (inputStart, input)));
531 input += 3;
532 break;
535 ++input;
538 else
540 // this is some other element, so parse and add it..
541 if (XmlElement* const n = readNextElement (true))
542 childAppender.append (n);
543 else
544 break;
547 else // must be a character block
549 input = preWhitespaceInput; // roll back to include the leading whitespace
550 MemoryOutputStream textElementContent;
551 bool contentShouldBeUsed = ! ignoreEmptyTextElements;
553 for (;;)
555 const water_uchar c = *input;
557 if (c == '<')
559 if (input[1] == '!' && input[2] == '-' && input[3] == '-')
561 input += 4;
562 const int closeComment = input.indexOf (CharPointer_UTF8 ("-->"));
564 if (closeComment < 0)
566 setLastError ("unterminated comment", false);
567 outOfData = true;
568 return;
571 input += closeComment + 3;
572 continue;
575 break;
578 if (c == 0)
580 setLastError ("unmatched tags", false);
581 outOfData = true;
582 return;
585 if (c == '&')
587 String entity;
588 readEntity (entity);
590 if (entity.startsWithChar ('<') && entity [1] != 0)
592 const CharPointer_UTF8 oldInput (input);
593 const bool oldOutOfData = outOfData;
595 input = entity.getCharPointer();
596 outOfData = false;
598 while (XmlElement* n = readNextElement (true))
599 childAppender.append (n);
601 input = oldInput;
602 outOfData = oldOutOfData;
604 else
606 textElementContent << entity;
607 contentShouldBeUsed = contentShouldBeUsed || entity.containsNonWhitespaceChars();
610 else
612 for (;; ++input)
614 water_uchar nextChar = *input;
616 if (nextChar == '\r')
618 nextChar = '\n';
620 if (input[1] == '\n')
621 continue;
624 if (nextChar == '<' || nextChar == '&')
625 break;
627 if (nextChar == 0)
629 setLastError ("unmatched tags", false);
630 outOfData = true;
631 return;
634 textElementContent.appendUTF8Char (nextChar);
635 contentShouldBeUsed = contentShouldBeUsed || ! CharacterFunctions::isWhitespace (nextChar);
640 if (contentShouldBeUsed)
641 childAppender.append (XmlElement::createTextElement (textElementContent.toUTF8()));
646 void XmlDocument::readEntity (String& result)
648 // skip over the ampersand
649 ++input;
651 if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("amp;"), 4) == 0)
653 input += 4;
654 result += '&';
656 else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("quot;"), 5) == 0)
658 input += 5;
659 result += '"';
661 else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("apos;"), 5) == 0)
663 input += 5;
664 result += '\'';
666 else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("lt;"), 3) == 0)
668 input += 3;
669 result += '<';
671 else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("gt;"), 3) == 0)
673 input += 3;
674 result += '>';
676 else if (*input == '#')
678 int charCode = 0;
679 ++input;
681 if (*input == 'x' || *input == 'X')
683 ++input;
684 int numChars = 0;
686 while (input[0] != ';')
688 const int hexValue = CharacterFunctions::getHexDigitValue (input[0]);
690 if (hexValue < 0 || ++numChars > 8)
692 setLastError ("illegal escape sequence", true);
693 break;
696 charCode = (charCode << 4) | hexValue;
697 ++input;
700 ++input;
702 else if (input[0] >= '0' && input[0] <= '9')
704 int numChars = 0;
706 while (input[0] != ';')
708 if (++numChars > 12)
710 setLastError ("illegal escape sequence", true);
711 break;
714 charCode = charCode * 10 + ((int) input[0] - '0');
715 ++input;
718 ++input;
720 else
722 setLastError ("illegal escape sequence", true);
723 result += '&';
724 return;
727 result << (water_uchar) charCode;
729 else
731 const CharPointer_UTF8 entityNameStart (input);
732 const int closingSemiColon = input.indexOf ((water_uchar) ';');
734 if (closingSemiColon < 0)
736 outOfData = true;
737 result += '&';
739 else
741 input += closingSemiColon + 1;
743 result += expandExternalEntity (String (entityNameStart, (size_t) closingSemiColon));
748 String XmlDocument::expandEntity (const String& ent)
750 if (ent.equalsIgnoreCase ("amp")) return String::charToString ('&');
751 if (ent.equalsIgnoreCase ("quot")) return String::charToString ('"');
752 if (ent.equalsIgnoreCase ("apos")) return String::charToString ('\'');
753 if (ent.equalsIgnoreCase ("lt")) return String::charToString ('<');
754 if (ent.equalsIgnoreCase ("gt")) return String::charToString ('>');
756 if (ent[0] == '#')
758 const water_uchar char1 = ent[1];
760 if (char1 == 'x' || char1 == 'X')
761 return String::charToString (static_cast<water_uchar> (ent.substring (2).getHexValue32()));
763 if (char1 >= '0' && char1 <= '9')
764 return String::charToString (static_cast<water_uchar> (ent.substring (1).getIntValue()));
766 setLastError ("illegal escape sequence", false);
767 return String::charToString ('&');
770 return expandExternalEntity (ent);
773 String XmlDocument::expandExternalEntity (const String& entity)
775 if (needToLoadDTD)
777 if (dtdText.isNotEmpty())
779 dtdText = dtdText.trimCharactersAtEnd (">");
780 tokenisedDTD.addTokens (dtdText, true);
782 if (tokenisedDTD [tokenisedDTD.size() - 2].equalsIgnoreCase ("system")
783 && tokenisedDTD [tokenisedDTD.size() - 1].isQuotedString())
785 const String fn (tokenisedDTD [tokenisedDTD.size() - 1]);
787 tokenisedDTD.clear();
788 tokenisedDTD.addTokens (getFileContents (fn), true);
790 else
792 tokenisedDTD.clear();
793 const int openBracket = dtdText.indexOfChar ('[');
795 if (openBracket > 0)
797 const int closeBracket = dtdText.lastIndexOfChar (']');
799 if (closeBracket > openBracket)
800 tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
801 closeBracket), true);
805 for (int i = tokenisedDTD.size(); --i >= 0;)
807 if (tokenisedDTD[i].startsWithChar ('%')
808 && tokenisedDTD[i].endsWithChar (';'))
810 const String parsed (getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1)));
811 StringArray newToks;
812 newToks.addTokens (parsed, true);
814 tokenisedDTD.remove (i);
816 for (int j = newToks.size(); --j >= 0;)
817 tokenisedDTD.insert (i, newToks[j]);
822 needToLoadDTD = false;
825 for (int i = 0; i < tokenisedDTD.size(); ++i)
827 if (tokenisedDTD[i] == entity)
829 if (tokenisedDTD[i - 1].equalsIgnoreCase ("<!entity"))
831 String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">").trim().unquoted());
833 // check for sub-entities..
834 int ampersand = ent.indexOfChar ('&');
836 while (ampersand >= 0)
838 const int semiColon = ent.indexOf (i + 1, ";");
840 if (semiColon < 0)
842 setLastError ("entity without terminating semi-colon", false);
843 break;
846 const String resolved (expandEntity (ent.substring (i + 1, semiColon)));
848 ent = ent.substring (0, ampersand)
849 + resolved
850 + ent.substring (semiColon + 1);
852 ampersand = ent.indexOfChar (semiColon + 1, '&');
855 return ent;
860 setLastError ("unknown entity", true);
862 return entity;
865 String XmlDocument::getParameterEntity (const String& entity)
867 for (int i = 0; i < tokenisedDTD.size(); ++i)
869 if (tokenisedDTD[i] == entity
870 && tokenisedDTD [i - 1] == "%"
871 && tokenisedDTD [i - 2].equalsIgnoreCase ("<!entity"))
873 const String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">"));
875 if (ent.equalsIgnoreCase ("system"))
876 return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (">"));
878 return ent.trim().unquoted();
882 return entity;