VST3: fetch midi mappings all at once, use it for note/sound-off
[carla.git] / source / modules / juce_core / xml / juce_XmlDocument.cpp
blob075f94760c6e2f5d4521aa40ff3e1a78bf9cc287
1 /*
2 ==============================================================================
4 This file is part of the JUCE library.
5 Copyright (c) 2022 - Raw Material Software Limited
7 JUCE is an open source library subject to commercial or open-source
8 licensing.
10 The code included in this file is provided under the terms of the ISC license
11 http://www.isc.org/downloads/software-support-policy/isc-license. Permission
12 To use, copy, modify, and/or distribute this software for any purpose with or
13 without fee is hereby granted provided that the above copyright notice and
14 this permission notice appear in all copies.
16 JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
17 EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
18 DISCLAIMED.
20 ==============================================================================
23 namespace juce
26 XmlDocument::XmlDocument (const String& text) : originalText (text) {}
27 XmlDocument::XmlDocument (const File& file) : inputSource (new FileInputSource (file)) {}
29 XmlDocument::~XmlDocument() {}
31 std::unique_ptr<XmlElement> XmlDocument::parse (const File& file)
33 return XmlDocument (file).getDocumentElement();
36 std::unique_ptr<XmlElement> XmlDocument::parse (const String& textToParse)
38 return XmlDocument (textToParse).getDocumentElement();
41 std::unique_ptr<XmlElement> parseXML (const String& textToParse)
43 return XmlDocument (textToParse).getDocumentElement();
46 std::unique_ptr<XmlElement> parseXML (const File& file)
48 return XmlDocument (file).getDocumentElement();
51 std::unique_ptr<XmlElement> parseXMLIfTagMatches (const String& textToParse, StringRef requiredTag)
53 return XmlDocument (textToParse).getDocumentElementIfTagMatches (requiredTag);
56 std::unique_ptr<XmlElement> parseXMLIfTagMatches (const File& file, StringRef requiredTag)
58 return XmlDocument (file).getDocumentElementIfTagMatches (requiredTag);
61 void XmlDocument::setInputSource (InputSource* newSource) noexcept
63 inputSource.reset (newSource);
66 void XmlDocument::setEmptyTextElementsIgnored (bool shouldBeIgnored) noexcept
68 ignoreEmptyTextElements = shouldBeIgnored;
71 namespace XmlIdentifierChars
73 static bool isIdentifierCharSlow (juce_wchar c) noexcept
75 return CharacterFunctions::isLetterOrDigit (c)
76 || c == '_' || c == '-' || c == ':' || c == '.';
79 static bool isIdentifierChar (juce_wchar c) noexcept
81 static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
83 return ((int) c < (int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (uint32) (1 << (c & 31))) != 0)
84 : isIdentifierCharSlow (c);
87 /*static void generateIdentifierCharConstants()
89 uint32 n[8] = { 0 };
90 for (int i = 0; i < 256; ++i)
91 if (isIdentifierCharSlow (i))
92 n[i >> 5] |= (1 << (i & 31));
94 String s;
95 for (int i = 0; i < 8; ++i)
96 s << "0x" << String::toHexString ((int) n[i]) << ", ";
98 DBG (s);
99 }*/
101 static String::CharPointerType findEndOfToken (String::CharPointerType p) noexcept
103 while (isIdentifierChar (*p))
104 ++p;
106 return p;
110 std::unique_ptr<XmlElement> XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
112 if (originalText.isEmpty() && inputSource != nullptr)
114 std::unique_ptr<InputStream> in (inputSource->createInputStream());
116 if (in != nullptr)
118 MemoryOutputStream data;
119 data.writeFromInputStream (*in, onlyReadOuterDocumentElement ? 8192 : -1);
121 #if JUCE_STRING_UTF_TYPE == 8
122 if (data.getDataSize() > 2)
124 data.writeByte (0);
125 auto* text = static_cast<const char*> (data.getData());
127 if (CharPointer_UTF16::isByteOrderMarkBigEndian (text)
128 || CharPointer_UTF16::isByteOrderMarkLittleEndian (text))
130 originalText = data.toString();
132 else
134 if (CharPointer_UTF8::isByteOrderMark (text))
135 text += 3;
137 // parse the input buffer directly to avoid copying it all to a string..
138 return parseDocumentElement (String::CharPointerType (text), onlyReadOuterDocumentElement);
141 #else
142 originalText = data.toString();
143 #endif
147 return parseDocumentElement (originalText.getCharPointer(), onlyReadOuterDocumentElement);
150 std::unique_ptr<XmlElement> XmlDocument::getDocumentElementIfTagMatches (StringRef requiredTag)
152 if (auto xml = getDocumentElement (true))
153 if (xml->hasTagName (requiredTag))
154 return getDocumentElement (false);
156 return {};
159 const String& XmlDocument::getLastParseError() const noexcept
161 return lastError;
164 void XmlDocument::setLastError (const String& desc, const bool carryOn)
166 lastError = desc;
167 errorOccurred = ! carryOn;
170 String XmlDocument::getFileContents (const String& filename) const
172 if (inputSource != nullptr)
174 std::unique_ptr<InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
176 if (in != nullptr)
177 return in->readEntireStreamAsString();
180 return {};
183 juce_wchar XmlDocument::readNextChar() noexcept
185 auto c = input.getAndAdvance();
187 if (c == 0)
189 outOfData = true;
190 --input;
193 return c;
196 std::unique_ptr<XmlElement> XmlDocument::parseDocumentElement (String::CharPointerType textToParse,
197 bool onlyReadOuterDocumentElement)
199 input = textToParse;
200 errorOccurred = false;
201 outOfData = false;
202 needToLoadDTD = true;
204 if (textToParse.isEmpty())
206 lastError = "not enough input";
208 else if (! parseHeader())
210 lastError = "malformed header";
212 else if (! parseDTD())
214 lastError = "malformed DTD";
216 else
218 lastError.clear();
219 std::unique_ptr<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
221 if (! errorOccurred)
222 return result;
225 return {};
228 bool XmlDocument::parseHeader()
230 skipNextWhiteSpace();
232 if (CharacterFunctions::compareUpTo (input, CharPointer_ASCII ("<?xml"), 5) == 0)
234 auto headerEnd = CharacterFunctions::find (input, CharPointer_ASCII ("?>"));
236 if (headerEnd.isEmpty())
237 return false;
239 #if JUCE_DEBUG
240 auto encoding = String (input, headerEnd)
241 .fromFirstOccurrenceOf ("encoding", false, true)
242 .fromFirstOccurrenceOf ("=", false, false)
243 .fromFirstOccurrenceOf ("\"", false, false)
244 .upToFirstOccurrenceOf ("\"", false, false)
245 .trim();
247 /* If you load an XML document with a non-UTF encoding type, it may have been
248 loaded wrongly.. Since all the files are read via the normal juce file streams,
249 they're treated as UTF-8, so by the time it gets to the parser, the encoding will
250 have been lost. Best plan is to stick to utf-8 or if you have specific files to
251 read, use your own code to convert them to a unicode String, and pass that to the
252 XML parser.
254 jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-"));
255 #endif
257 input = headerEnd + 2;
258 skipNextWhiteSpace();
261 return true;
264 bool XmlDocument::parseDTD()
266 if (CharacterFunctions::compareUpTo (input, CharPointer_ASCII ("<!DOCTYPE"), 9) == 0)
268 input += 9;
269 auto dtdStart = input;
271 for (int n = 1; n > 0;)
273 auto c = readNextChar();
275 if (outOfData)
276 return false;
278 if (c == '<')
279 ++n;
280 else if (c == '>')
281 --n;
284 dtdText = String (dtdStart, input - 1).trim();
287 return true;
290 void XmlDocument::skipNextWhiteSpace()
292 for (;;)
294 input.incrementToEndOfWhitespace();
296 if (input.isEmpty())
298 outOfData = true;
299 break;
302 if (*input == '<')
304 if (input[1] == '!'
305 && input[2] == '-'
306 && input[3] == '-')
308 input += 4;
309 auto closeComment = input.indexOf (CharPointer_ASCII ("-->"));
311 if (closeComment < 0)
313 outOfData = true;
314 break;
317 input += closeComment + 3;
318 continue;
321 if (input[1] == '?')
323 input += 2;
324 auto closeBracket = input.indexOf (CharPointer_ASCII ("?>"));
326 if (closeBracket < 0)
328 outOfData = true;
329 break;
332 input += closeBracket + 2;
333 continue;
337 break;
341 void XmlDocument::readQuotedString (String& result)
343 auto quote = readNextChar();
345 while (! outOfData)
347 auto c = readNextChar();
349 if (c == quote)
350 break;
352 --input;
354 if (c == '&')
356 readEntity (result);
358 else
360 auto start = input;
362 for (;;)
364 auto character = *input;
366 if (character == quote)
368 result.appendCharPointer (start, input);
369 ++input;
370 return;
373 if (character == '&')
375 result.appendCharPointer (start, input);
376 break;
379 if (character == 0)
381 setLastError ("unmatched quotes", false);
382 outOfData = true;
383 break;
386 ++input;
392 XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
394 XmlElement* node = nullptr;
395 skipNextWhiteSpace();
397 if (outOfData)
398 return nullptr;
400 if (*input == '<')
402 ++input;
403 auto endOfToken = XmlIdentifierChars::findEndOfToken (input);
405 if (endOfToken == input)
407 // no tag name - but allow for a gap after the '<' before giving an error
408 skipNextWhiteSpace();
409 endOfToken = XmlIdentifierChars::findEndOfToken (input);
411 if (endOfToken == input)
413 setLastError ("tag name missing", false);
414 return node;
418 node = new XmlElement (input, endOfToken);
419 input = endOfToken;
420 LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
422 // look for attributes
423 for (;;)
425 skipNextWhiteSpace();
426 auto c = *input;
428 // empty tag..
429 if (c == '/' && input[1] == '>')
431 input += 2;
432 break;
435 // parse the guts of the element..
436 if (c == '>')
438 ++input;
440 if (alsoParseSubElements)
441 readChildElements (*node);
443 break;
446 // get an attribute..
447 if (XmlIdentifierChars::isIdentifierChar (c))
449 auto attNameEnd = XmlIdentifierChars::findEndOfToken (input);
451 if (attNameEnd != input)
453 auto attNameStart = input;
454 input = attNameEnd;
455 skipNextWhiteSpace();
457 if (readNextChar() == '=')
459 skipNextWhiteSpace();
460 auto nextChar = *input;
462 if (nextChar == '"' || nextChar == '\'')
464 auto* newAtt = new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
465 readQuotedString (newAtt->value);
466 attributeAppender.append (newAtt);
467 continue;
470 else
472 setLastError ("expected '=' after attribute '"
473 + String (attNameStart, attNameEnd) + "'", false);
474 return node;
478 else
480 if (! outOfData)
481 setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false);
484 break;
488 return node;
491 void XmlDocument::readChildElements (XmlElement& parent)
493 LinkedListPointer<XmlElement>::Appender childAppender (parent.firstChildElement);
495 for (;;)
497 auto preWhitespaceInput = input;
498 skipNextWhiteSpace();
500 if (outOfData)
502 setLastError ("unmatched tags", false);
503 break;
506 if (*input == '<')
508 auto c1 = input[1];
510 if (c1 == '/')
512 // our close tag..
513 auto closeTag = input.indexOf ((juce_wchar) '>');
515 if (closeTag >= 0)
516 input += closeTag + 1;
518 break;
521 if (c1 == '!' && CharacterFunctions::compareUpTo (input + 2, CharPointer_ASCII ("[CDATA["), 7) == 0)
523 input += 9;
524 auto inputStart = input;
526 for (;;)
528 auto c0 = *input;
530 if (c0 == 0)
532 setLastError ("unterminated CDATA section", false);
533 outOfData = true;
534 break;
537 if (c0 == ']' && input[1] == ']' && input[2] == '>')
539 childAppender.append (XmlElement::createTextElement (String (inputStart, input)));
540 input += 3;
541 break;
544 ++input;
547 else
549 // this is some other element, so parse and add it..
550 if (auto* n = readNextElement (true))
551 childAppender.append (n);
552 else
553 break;
556 else // must be a character block
558 input = preWhitespaceInput; // roll back to include the leading whitespace
559 MemoryOutputStream textElementContent;
560 bool contentShouldBeUsed = ! ignoreEmptyTextElements;
562 for (;;)
564 auto c = *input;
566 if (c == '<')
568 if (input[1] == '!' && input[2] == '-' && input[3] == '-')
570 input += 4;
571 auto closeComment = input.indexOf (CharPointer_ASCII ("-->"));
573 if (closeComment < 0)
575 setLastError ("unterminated comment", false);
576 outOfData = true;
577 return;
580 input += closeComment + 3;
581 continue;
584 break;
587 if (c == 0)
589 setLastError ("unmatched tags", false);
590 outOfData = true;
591 return;
594 if (c == '&')
596 String entity;
597 readEntity (entity);
599 if (entity.startsWithChar ('<') && entity [1] != 0)
601 auto oldInput = input;
602 auto oldOutOfData = outOfData;
604 input = entity.getCharPointer();
605 outOfData = false;
607 while (auto* n = readNextElement (true))
608 childAppender.append (n);
610 input = oldInput;
611 outOfData = oldOutOfData;
613 else
615 textElementContent << entity;
616 contentShouldBeUsed = contentShouldBeUsed || entity.containsNonWhitespaceChars();
619 else
621 for (;; ++input)
623 auto nextChar = *input;
625 if (nextChar == '\r')
627 nextChar = '\n';
629 if (input[1] == '\n')
630 continue;
633 if (nextChar == '<' || nextChar == '&')
634 break;
636 if (nextChar == 0)
638 setLastError ("unmatched tags", false);
639 outOfData = true;
640 return;
643 textElementContent.appendUTF8Char (nextChar);
644 contentShouldBeUsed = contentShouldBeUsed || ! CharacterFunctions::isWhitespace (nextChar);
649 if (contentShouldBeUsed)
650 childAppender.append (XmlElement::createTextElement (textElementContent.toUTF8()));
655 void XmlDocument::readEntity (String& result)
657 // skip over the ampersand
658 ++input;
660 if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("amp;"), 4) == 0)
662 input += 4;
663 result += '&';
665 else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("quot;"), 5) == 0)
667 input += 5;
668 result += '"';
670 else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("apos;"), 5) == 0)
672 input += 5;
673 result += '\'';
675 else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("lt;"), 3) == 0)
677 input += 3;
678 result += '<';
680 else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("gt;"), 3) == 0)
682 input += 3;
683 result += '>';
685 else if (*input == '#')
687 int64_t charCode = 0;
688 ++input;
690 if (*input == 'x' || *input == 'X')
692 ++input;
693 int numChars = 0;
695 while (input[0] != ';')
697 auto hexValue = CharacterFunctions::getHexDigitValue (input[0]);
699 if (hexValue < 0 || ++numChars > 8)
701 setLastError ("illegal escape sequence", true);
702 break;
705 charCode = (charCode << 4) | hexValue;
706 ++input;
709 ++input;
711 else if (input[0] >= '0' && input[0] <= '9')
713 int numChars = 0;
715 for (;;)
717 const auto firstChar = input[0];
719 if (firstChar == 0)
721 setLastError ("unexpected end of input", true);
722 return;
725 if (firstChar == ';')
726 break;
728 if (++numChars > 12)
730 setLastError ("illegal escape sequence", true);
731 break;
734 charCode = charCode * 10 + ((int) firstChar - '0');
735 ++input;
738 ++input;
740 else
742 setLastError ("illegal escape sequence", true);
743 result += '&';
744 return;
747 result << (juce_wchar) charCode;
749 else
751 auto entityNameStart = input;
752 auto closingSemiColon = input.indexOf ((juce_wchar) ';');
754 if (closingSemiColon < 0)
756 outOfData = true;
757 result += '&';
759 else
761 input += closingSemiColon + 1;
762 result += expandExternalEntity (String (entityNameStart, (size_t) closingSemiColon));
767 String XmlDocument::expandEntity (const String& ent)
769 if (ent.equalsIgnoreCase ("amp")) return String::charToString ('&');
770 if (ent.equalsIgnoreCase ("quot")) return String::charToString ('"');
771 if (ent.equalsIgnoreCase ("apos")) return String::charToString ('\'');
772 if (ent.equalsIgnoreCase ("lt")) return String::charToString ('<');
773 if (ent.equalsIgnoreCase ("gt")) return String::charToString ('>');
775 if (ent[0] == '#')
777 auto char1 = ent[1];
779 if (char1 == 'x' || char1 == 'X')
780 return String::charToString (static_cast<juce_wchar> (ent.substring (2).getHexValue32()));
782 if (char1 >= '0' && char1 <= '9')
783 return String::charToString (static_cast<juce_wchar> (ent.substring (1).getIntValue()));
785 setLastError ("illegal escape sequence", false);
786 return String::charToString ('&');
789 return expandExternalEntity (ent);
792 String XmlDocument::expandExternalEntity (const String& entity)
794 if (needToLoadDTD)
796 if (dtdText.isNotEmpty())
798 dtdText = dtdText.trimCharactersAtEnd (">");
799 tokenisedDTD.addTokens (dtdText, true);
801 if (tokenisedDTD[tokenisedDTD.size() - 2].equalsIgnoreCase ("system")
802 && tokenisedDTD[tokenisedDTD.size() - 1].isQuotedString())
804 auto fn = tokenisedDTD[tokenisedDTD.size() - 1];
806 tokenisedDTD.clear();
807 tokenisedDTD.addTokens (getFileContents (fn), true);
809 else
811 tokenisedDTD.clear();
812 auto openBracket = dtdText.indexOfChar ('[');
814 if (openBracket > 0)
816 auto closeBracket = dtdText.lastIndexOfChar (']');
818 if (closeBracket > openBracket)
819 tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
820 closeBracket), true);
824 for (int i = tokenisedDTD.size(); --i >= 0;)
826 if (tokenisedDTD[i].startsWithChar ('%')
827 && tokenisedDTD[i].endsWithChar (';'))
829 auto parsed = getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1));
830 StringArray newToks;
831 newToks.addTokens (parsed, true);
833 tokenisedDTD.remove (i);
835 for (int j = newToks.size(); --j >= 0;)
836 tokenisedDTD.insert (i, newToks[j]);
841 needToLoadDTD = false;
844 for (int i = 0; i < tokenisedDTD.size(); ++i)
846 if (tokenisedDTD[i] == entity)
848 if (tokenisedDTD[i - 1].equalsIgnoreCase ("<!entity"))
850 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (">").trim().unquoted();
852 // check for sub-entities..
853 auto ampersand = ent.indexOfChar ('&');
855 while (ampersand >= 0)
857 auto semiColon = ent.indexOf (i + 1, ";");
859 if (semiColon < 0)
861 setLastError ("entity without terminating semi-colon", false);
862 break;
865 auto resolved = expandEntity (ent.substring (i + 1, semiColon));
867 ent = ent.substring (0, ampersand)
868 + resolved
869 + ent.substring (semiColon + 1);
871 ampersand = ent.indexOfChar (semiColon + 1, '&');
874 return ent;
879 setLastError ("unknown entity", true);
880 return entity;
883 String XmlDocument::getParameterEntity (const String& entity)
885 for (int i = 0; i < tokenisedDTD.size(); ++i)
887 if (tokenisedDTD[i] == entity
888 && tokenisedDTD [i - 1] == "%"
889 && tokenisedDTD [i - 2].equalsIgnoreCase ("<!entity"))
891 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (">");
893 if (ent.equalsIgnoreCase ("system"))
894 return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (">"));
896 return ent.trim().unquoted();
900 return entity;