source/modules/water/xml/XmlDocument.cpp

   1 /*
   2   ==============================================================================
   3
   4    This file is part of the Water library.
   5    Copyright (c) 2016 ROLI Ltd.
   6    Copyright (C) 2017-2022 Filipe Coelho <falktx@falktx.com>
   7
   8    Permission is granted to use this software under the terms of the ISC license
   9    http://www.isc.org/downloads/software-support-policy/isc-license/
  10
  11    Permission to use, copy, modify, and/or distribute this software for any
  12    purpose with or without fee is hereby granted, provided that the above
  13    copyright notice and this permission notice appear in all copies.
  14
  15    THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH REGARD
  16    TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  17    FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
  18    OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
  19    USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
  20    TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  21    OF THIS SOFTWARE.
  22
  23   ==============================================================================
  24 */
  25
  26 #include "XmlDocument.h"
  27 #include "XmlElement.h"
  28 #include "../containers/LinkedListPointer.h"
  29 #include "../streams/FileInputSource.h"
  30 #include "../streams/InputStream.h"
  31 #include "../streams/MemoryOutputStream.h"
  32
  33 namespace water {
  34
  35 XmlDocument::XmlDocument (const String& documentText)
  36     : originalText (documentText),
  37       input (nullptr),
  38       outOfData (false),
  39       errorOccurred (false),
  40       needToLoadDTD (false),
  41       ignoreEmptyTextElements (true)
  42 {
  43 }
  44
  45 XmlDocument::XmlDocument (const File& file)
  46     : input (nullptr),
  47       outOfData (false),
  48       errorOccurred (false),
  49       needToLoadDTD (false),
  50       ignoreEmptyTextElements (true),
  51       inputSource (new FileInputSource (file))
  52 {
  53 }
  54
  55 XmlDocument::~XmlDocument()
  56 {
  57 }
  58
  59 XmlElement* XmlDocument::parse (const File& file)
  60 {
  61     XmlDocument doc (file);
  62     return doc.getDocumentElement();
  63 }
  64
  65 XmlElement* XmlDocument::parse (const String& xmlData)
  66 {
  67     XmlDocument doc (xmlData);
  68     return doc.getDocumentElement();
  69 }
  70
  71 void XmlDocument::setInputSource (FileInputSource* const newSource) noexcept
  72 {
  73     inputSource = newSource;
  74 }
  75
  76 void XmlDocument::setEmptyTextElementsIgnored (const bool shouldBeIgnored) noexcept
  77 {
  78     ignoreEmptyTextElements = shouldBeIgnored;
  79 }
  80
  81 namespace XmlIdentifierChars
  82 {
  83     static bool isIdentifierCharSlow (const water_uchar c) noexcept
  84     {
  85         return CharacterFunctions::isLetterOrDigit (c)
  86                  || c == '_' || c == '-' || c == ':' || c == '.';
  87     }
  88
  89     static bool isIdentifierChar (const water_uchar c) noexcept
  90     {
  91         static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
  92
  93         return ((int) c < (int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
  94                                                                       : isIdentifierCharSlow (c);
  95     }
  96
  97     /*static void generateIdentifierCharConstants()
  98     {
  99         uint32 n[8] = { 0 };
 100         for (int i = 0; i < 256; ++i)
 101             if (isIdentifierCharSlow (i))
 102                 n[i >> 5] |= (1 << (i & 31));
 103
 104         String s;
 105         for (int i = 0; i < 8; ++i)
 106             s << "0x" << String::toHexString ((int) n[i]) << ", ";
 107
 108         DBG (s);
 109     }*/
 110
 111     static CharPointer_UTF8 findEndOfToken (CharPointer_UTF8 p)
 112     {
 113         while (isIdentifierChar (*p))
 114             ++p;
 115
 116         return p;
 117     }
 118 }
 119
 120 XmlElement* XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
 121 {
 122     if (originalText.isEmpty() && inputSource != nullptr)
 123     {
 124         CarlaScopedPointer<InputStream> in (inputSource->createInputStream());
 125
 126         if (in != nullptr)
 127         {
 128             MemoryOutputStream data;
 129             data.writeFromInputStream (*in, onlyReadOuterDocumentElement ? 8192 : -1);
 130
 131             if (data.getDataSize() > 2)
 132             {
 133                 data.writeByte (0);
 134                 const char* text = static_cast<const char*> (data.getData());
 135
 136                 if (CharPointer_UTF8::isByteOrderMark (text))
 137                     text += 3;
 138
 139                 // parse the input buffer directly to avoid copying it all to a string..
 140                 return parseDocumentElement (CharPointer_UTF8 (text), onlyReadOuterDocumentElement);
 141             }
 142         }
 143     }
 144
 145     return parseDocumentElement (originalText.getCharPointer(), onlyReadOuterDocumentElement);
 146 }
 147
 148 const String& XmlDocument::getLastParseError() const noexcept
 149 {
 150     return lastError;
 151 }
 152
 153 void XmlDocument::setLastError (const String& desc, const bool carryOn)
 154 {
 155     lastError = desc;
 156     errorOccurred = ! carryOn;
 157 }
 158
 159 String XmlDocument::getFileContents (const String& filename) const
 160 {
 161     if (inputSource != nullptr)
 162     {
 163         const CarlaScopedPointer<InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
 164
 165         if (in != nullptr)
 166             return in->readEntireStreamAsString();
 167     }
 168
 169     return String();
 170 }
 171
 172 water_uchar XmlDocument::readNextChar() noexcept
 173 {
 174     const water_uchar c = input.getAndAdvance();
 175
 176     if (c == 0)
 177     {
 178         outOfData = true;
 179         --input;
 180     }
 181
 182     return c;
 183 }
 184
 185 XmlElement* XmlDocument::parseDocumentElement (CharPointer_UTF8 textToParse,
 186                                                const bool onlyReadOuterDocumentElement)
 187 {
 188     input = textToParse;
 189     errorOccurred = false;
 190     outOfData = false;
 191     needToLoadDTD = true;
 192
 193     if (textToParse.isEmpty())
 194     {
 195         lastError = "not enough input";
 196     }
 197     else if (! parseHeader())
 198     {
 199         lastError = "malformed header";
 200     }
 201     else if (! parseDTD())
 202     {
 203         lastError = "malformed DTD";
 204     }
 205     else
 206     {
 207         lastError.clear();
 208
 209         CarlaScopedPointer<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
 210
 211         if (! errorOccurred)
 212             return result.release();
 213     }
 214
 215     return nullptr;
 216 }
 217
 218 bool XmlDocument::parseHeader()
 219 {
 220     skipNextWhiteSpace();
 221
 222     if (CharacterFunctions::compareUpTo (input, CharPointer_UTF8 ("<?xml"), 5) == 0)
 223     {
 224         const CharPointer_UTF8 headerEnd (CharacterFunctions::find (input, CharPointer_UTF8 ("?>")));
 225
 226         if (headerEnd.isEmpty())
 227             return false;
 228
 229         const String encoding (String (input, headerEnd)
 230                                  .fromFirstOccurrenceOf ("encoding", false, true)
 231                                  .fromFirstOccurrenceOf ("=", false, false)
 232                                  .fromFirstOccurrenceOf ("\"", false, false)
 233                                  .upToFirstOccurrenceOf ("\"", false, false).trim());
 234
 235         /* If you load an XML document with a non-UTF encoding type, it may have been
 236            loaded wrongly.. Since all the files are read via the normal water file streams,
 237            they're treated as UTF-8, so by the time it gets to the parser, the encoding will
 238            have been lost. Best plan is to stick to utf-8 or if you have specific files to
 239            read, use your own code to convert them to a unicode String, and pass that to the
 240            XML parser.
 241         */
 242         CARLA_SAFE_ASSERT_RETURN (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-"), false);
 243
 244         input = headerEnd + 2;
 245         skipNextWhiteSpace();
 246     }
 247
 248     return true;
 249 }
 250
 251 bool XmlDocument::parseDTD()
 252 {
 253     if (CharacterFunctions::compareUpTo (input, CharPointer_UTF8 ("<!DOCTYPE"), 9) == 0)
 254     {
 255         input += 9;
 256         const CharPointer_UTF8 dtdStart (input);
 257
 258         for (int n = 1; n > 0;)
 259         {
 260             const water_uchar c = readNextChar();
 261
 262             if (outOfData)
 263                 return false;
 264
 265             if (c == '<')
 266                 ++n;
 267             else if (c == '>')
 268                 --n;
 269         }
 270
 271         dtdText = String (dtdStart, input - 1).trim();
 272     }
 273
 274     return true;
 275 }
 276
 277 void XmlDocument::skipNextWhiteSpace()
 278 {
 279     for (;;)
 280     {
 281         input = input.findEndOfWhitespace();
 282
 283         if (input.isEmpty())
 284         {
 285             outOfData = true;
 286             break;
 287         }
 288
 289         if (*input == '<')
 290         {
 291             if (input[1] == '!'
 292                  && input[2] == '-'
 293                  && input[3] == '-')
 294             {
 295                 input += 4;
 296                 const int closeComment = input.indexOf (CharPointer_UTF8 ("-->"));
 297
 298                 if (closeComment < 0)
 299                 {
 300                     outOfData = true;
 301                     break;
 302                 }
 303
 304                 input += closeComment + 3;
 305                 continue;
 306             }
 307
 308             if (input[1] == '?')
 309             {
 310                 input += 2;
 311                 const int closeBracket = input.indexOf (CharPointer_UTF8 ("?>"));
 312
 313                 if (closeBracket < 0)
 314                 {
 315                     outOfData = true;
 316                     break;
 317                 }
 318
 319                 input += closeBracket + 2;
 320                 continue;
 321             }
 322         }
 323
 324         break;
 325     }
 326 }
 327
 328 void XmlDocument::readQuotedString (String& result)
 329 {
 330     const water_uchar quote = readNextChar();
 331
 332     while (! outOfData)
 333     {
 334         const water_uchar c = readNextChar();
 335
 336         if (c == quote)
 337             break;
 338
 339         --input;
 340
 341         if (c == '&')
 342         {
 343             readEntity (result);
 344         }
 345         else
 346         {
 347             const CharPointer_UTF8 start (input);
 348
 349             for (;;)
 350             {
 351                 const water_uchar character = *input;
 352
 353                 if (character == quote)
 354                 {
 355                     result.appendCharPointer (start, input);
 356                     ++input;
 357                     return;
 358                 }
 359                 else if (character == '&')
 360                 {
 361                     result.appendCharPointer (start, input);
 362                     break;
 363                 }
 364                 else if (character == 0)
 365                 {
 366                     setLastError ("unmatched quotes", false);
 367                     outOfData = true;
 368                     break;
 369                 }
 370
 371                 ++input;
 372             }
 373         }
 374     }
 375 }
 376
 377 XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
 378 {
 379     XmlElement* node = nullptr;
 380
 381     skipNextWhiteSpace();
 382     if (outOfData)
 383         return nullptr;
 384
 385     if (*input == '<')
 386     {
 387         ++input;
 388         CharPointer_UTF8 endOfToken (XmlIdentifierChars::findEndOfToken (input));
 389
 390         if (endOfToken == input)
 391         {
 392             // no tag name - but allow for a gap after the '<' before giving an error
 393             skipNextWhiteSpace();
 394             endOfToken = XmlIdentifierChars::findEndOfToken (input);
 395
 396             if (endOfToken == input)
 397             {
 398                 setLastError ("tag name missing", false);
 399                 return node;
 400             }
 401         }
 402
 403         node = new XmlElement (input, endOfToken);
 404         input = endOfToken;
 405         LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
 406
 407         // look for attributes
 408         for (;;)
 409         {
 410             skipNextWhiteSpace();
 411
 412             const water_uchar c = *input;
 413
 414             // empty tag..
 415             if (c == '/' && input[1] == '>')
 416             {
 417                 input += 2;
 418                 break;
 419             }
 420
 421             // parse the guts of the element..
 422             if (c == '>')
 423             {
 424                 ++input;
 425
 426                 if (alsoParseSubElements)
 427                     readChildElements (*node);
 428
 429                 break;
 430             }
 431
 432             // get an attribute..
 433             if (XmlIdentifierChars::isIdentifierChar (c))
 434             {
 435                 CharPointer_UTF8 attNameEnd (XmlIdentifierChars::findEndOfToken (input));
 436
 437                 if (attNameEnd != input)
 438                 {
 439                     const CharPointer_UTF8 attNameStart (input);
 440                     input = attNameEnd;
 441
 442                     skipNextWhiteSpace();
 443
 444                     if (readNextChar() == '=')
 445                     {
 446                         skipNextWhiteSpace();
 447
 448                         const water_uchar nextChar = *input;
 449
 450                         if (nextChar == '"' || nextChar == '\'')
 451                         {
 452                             XmlElement::XmlAttributeNode* const newAtt
 453                                 = new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
 454
 455                             readQuotedString (newAtt->value);
 456                             attributeAppender.append (newAtt);
 457                             continue;
 458                         }
 459                     }
 460                     else
 461                     {
 462                         setLastError ("expected '=' after attribute '"
 463                                         + String (attNameStart, attNameEnd) + "'", false);
 464                         return node;
 465                     }
 466                 }
 467             }
 468             else
 469             {
 470                 if (! outOfData)
 471                     setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false);
 472             }
 473
 474             break;
 475         }
 476     }
 477
 478     return node;
 479 }
 480
 481 void XmlDocument::readChildElements (XmlElement& parent)
 482 {
 483     LinkedListPointer<XmlElement>::Appender childAppender (parent.firstChildElement);
 484
 485     for (;;)
 486     {
 487         const CharPointer_UTF8 preWhitespaceInput (input);
 488         skipNextWhiteSpace();
 489
 490         if (outOfData)
 491         {
 492             setLastError ("unmatched tags", false);
 493             break;
 494         }
 495
 496         if (*input == '<')
 497         {
 498             const water_uchar c1 = input[1];
 499
 500             if (c1 == '/')
 501             {
 502                 // our close tag..
 503                 const int closeTag = input.indexOf ((water_uchar) '>');
 504
 505                 if (closeTag >= 0)
 506                     input += closeTag + 1;
 507
 508                 break;
 509             }
 510
 511             if (c1 == '!' && CharacterFunctions::compareUpTo (input + 2, CharPointer_UTF8 ("[CDATA["), 7) == 0)
 512             {
 513                 input += 9;
 514                 const CharPointer_UTF8 inputStart (input);
 515
 516                 for (;;)
 517                 {
 518                     const water_uchar c0 = *input;
 519
 520                     if (c0 == 0)
 521                     {
 522                         setLastError ("unterminated CDATA section", false);
 523                         outOfData = true;
 524                         break;
 525                     }
 526                     else if (c0 == ']'
 527                               && input[1] == ']'
 528                               && input[2] == '>')
 529                     {
 530                         childAppender.append (XmlElement::createTextElement (String (inputStart, input)));
 531                         input += 3;
 532                         break;
 533                     }
 534
 535                     ++input;
 536                 }
 537             }
 538             else
 539             {
 540                 // this is some other element, so parse and add it..
 541                 if (XmlElement* const n = readNextElement (true))
 542                     childAppender.append (n);
 543                 else
 544                     break;
 545             }
 546         }
 547         else  // must be a character block
 548         {
 549             input = preWhitespaceInput; // roll back to include the leading whitespace
 550             MemoryOutputStream textElementContent;
 551             bool contentShouldBeUsed = ! ignoreEmptyTextElements;
 552
 553             for (;;)
 554             {
 555                 const water_uchar c = *input;
 556
 557                 if (c == '<')
 558                 {
 559                     if (input[1] == '!' && input[2] == '-' && input[3] == '-')
 560                     {
 561                         input += 4;
 562                         const int closeComment = input.indexOf (CharPointer_UTF8 ("-->"));
 563
 564                         if (closeComment < 0)
 565                         {
 566                             setLastError ("unterminated comment", false);
 567                             outOfData = true;
 568                             return;
 569                         }
 570
 571                         input += closeComment + 3;
 572                         continue;
 573                     }
 574
 575                     break;
 576                 }
 577
 578                 if (c == 0)
 579                 {
 580                     setLastError ("unmatched tags", false);
 581                     outOfData = true;
 582                     return;
 583                 }
 584
 585                 if (c == '&')
 586                 {
 587                     String entity;
 588                     readEntity (entity);
 589
 590                     if (entity.startsWithChar ('<') && entity [1] != 0)
 591                     {
 592                         const CharPointer_UTF8 oldInput (input);
 593                         const bool oldOutOfData = outOfData;
 594
 595                         input = entity.getCharPointer();
 596                         outOfData = false;
 597
 598                         while (XmlElement* n = readNextElement (true))
 599                             childAppender.append (n);
 600
 601                         input = oldInput;
 602                         outOfData = oldOutOfData;
 603                     }
 604                     else
 605                     {
 606                         textElementContent << entity;
 607                         contentShouldBeUsed = contentShouldBeUsed || entity.containsNonWhitespaceChars();
 608                     }
 609                 }
 610                 else
 611                 {
 612                     for (;; ++input)
 613                     {
 614                         water_uchar nextChar = *input;
 615
 616                         if (nextChar == '\r')
 617                         {
 618                             nextChar = '\n';
 619
 620                             if (input[1] == '\n')
 621                                 continue;
 622                         }
 623
 624                         if (nextChar == '<' || nextChar == '&')
 625                             break;
 626
 627                         if (nextChar == 0)
 628                         {
 629                             setLastError ("unmatched tags", false);
 630                             outOfData = true;
 631                             return;
 632                         }
 633
 634                         textElementContent.appendUTF8Char (nextChar);
 635                         contentShouldBeUsed = contentShouldBeUsed || ! CharacterFunctions::isWhitespace (nextChar);
 636                     }
 637                 }
 638             }
 639
 640             if (contentShouldBeUsed)
 641                 childAppender.append (XmlElement::createTextElement (textElementContent.toUTF8()));
 642         }
 643     }
 644 }
 645
 646 void XmlDocument::readEntity (String& result)
 647 {
 648     // skip over the ampersand
 649     ++input;
 650
 651     if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("amp;"), 4) == 0)
 652     {
 653         input += 4;
 654         result += '&';
 655     }
 656     else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("quot;"), 5) == 0)
 657     {
 658         input += 5;
 659         result += '"';
 660     }
 661     else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("apos;"), 5) == 0)
 662     {
 663         input += 5;
 664         result += '\'';
 665     }
 666     else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("lt;"), 3) == 0)
 667     {
 668         input += 3;
 669         result += '<';
 670     }
 671     else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("gt;"), 3) == 0)
 672     {
 673         input += 3;
 674         result += '>';
 675     }
 676     else if (*input == '#')
 677     {
 678         int charCode = 0;
 679         ++input;
 680
 681         if (*input == 'x' || *input == 'X')
 682         {
 683             ++input;
 684             int numChars = 0;
 685
 686             while (input[0] != ';')
 687             {
 688                 const int hexValue = CharacterFunctions::getHexDigitValue (input[0]);
 689
 690                 if (hexValue < 0 || ++numChars > 8)
 691                 {
 692                     setLastError ("illegal escape sequence", true);
 693                     break;
 694                 }
 695
 696                 charCode = (charCode << 4) | hexValue;
 697                 ++input;
 698             }
 699
 700             ++input;
 701         }
 702         else if (input[0] >= '0' && input[0] <= '9')
 703         {
 704             int numChars = 0;
 705
 706             while (input[0] != ';')
 707             {
 708                 if (++numChars > 12)
 709                 {
 710                     setLastError ("illegal escape sequence", true);
 711                     break;
 712                 }
 713
 714                 charCode = charCode * 10 + ((int) input[0] - '0');
 715                 ++input;
 716             }
 717
 718             ++input;
 719         }
 720         else
 721         {
 722             setLastError ("illegal escape sequence", true);
 723             result += '&';
 724             return;
 725         }
 726
 727         result << (water_uchar) charCode;
 728     }
 729     else
 730     {
 731         const CharPointer_UTF8 entityNameStart (input);
 732         const int closingSemiColon = input.indexOf ((water_uchar) ';');
 733
 734         if (closingSemiColon < 0)
 735         {
 736             outOfData = true;
 737             result += '&';
 738         }
 739         else
 740         {
 741             input += closingSemiColon + 1;
 742
 743             result += expandExternalEntity (String (entityNameStart, (size_t) closingSemiColon));
 744         }
 745     }
 746 }
 747
 748 String XmlDocument::expandEntity (const String& ent)
 749 {
 750     if (ent.equalsIgnoreCase ("amp"))   return String::charToString ('&');
 751     if (ent.equalsIgnoreCase ("quot"))  return String::charToString ('"');
 752     if (ent.equalsIgnoreCase ("apos"))  return String::charToString ('\'');
 753     if (ent.equalsIgnoreCase ("lt"))    return String::charToString ('<');
 754     if (ent.equalsIgnoreCase ("gt"))    return String::charToString ('>');
 755
 756     if (ent[0] == '#')
 757     {
 758         const water_uchar char1 = ent[1];
 759
 760         if (char1 == 'x' || char1 == 'X')
 761             return String::charToString (static_cast<water_uchar> (ent.substring (2).getHexValue32()));
 762
 763         if (char1 >= '0' && char1 <= '9')
 764             return String::charToString (static_cast<water_uchar> (ent.substring (1).getIntValue()));
 765
 766         setLastError ("illegal escape sequence", false);
 767         return String::charToString ('&');
 768     }
 769
 770     return expandExternalEntity (ent);
 771 }
 772
 773 String XmlDocument::expandExternalEntity (const String& entity)
 774 {
 775     if (needToLoadDTD)
 776     {
 777         if (dtdText.isNotEmpty())
 778         {
 779             dtdText = dtdText.trimCharactersAtEnd (">");
 780             tokenisedDTD.addTokens (dtdText, true);
 781
 782             if (tokenisedDTD [tokenisedDTD.size() - 2].equalsIgnoreCase ("system")
 783                  && tokenisedDTD [tokenisedDTD.size() - 1].isQuotedString())
 784             {
 785                 const String fn (tokenisedDTD [tokenisedDTD.size() - 1]);
 786
 787                 tokenisedDTD.clear();
 788                 tokenisedDTD.addTokens (getFileContents (fn), true);
 789             }
 790             else
 791             {
 792                 tokenisedDTD.clear();
 793                 const int openBracket = dtdText.indexOfChar ('[');
 794
 795                 if (openBracket > 0)
 796                 {
 797                     const int closeBracket = dtdText.lastIndexOfChar (']');
 798
 799                     if (closeBracket > openBracket)
 800                         tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
 801                                                                    closeBracket), true);
 802                 }
 803             }
 804
 805             for (int i = tokenisedDTD.size(); --i >= 0;)
 806             {
 807                 if (tokenisedDTD[i].startsWithChar ('%')
 808                      && tokenisedDTD[i].endsWithChar (';'))
 809                 {
 810                     const String parsed (getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1)));
 811                     StringArray newToks;
 812                     newToks.addTokens (parsed, true);
 813
 814                     tokenisedDTD.remove (i);
 815
 816                     for (int j = newToks.size(); --j >= 0;)
 817                         tokenisedDTD.insert (i, newToks[j]);
 818                 }
 819             }
 820         }
 821
 822         needToLoadDTD = false;
 823     }
 824
 825     for (int i = 0; i < tokenisedDTD.size(); ++i)
 826     {
 827         if (tokenisedDTD[i] == entity)
 828         {
 829             if (tokenisedDTD[i - 1].equalsIgnoreCase ("<!entity"))
 830             {
 831                 String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">").trim().unquoted());
 832
 833                 // check for sub-entities..
 834                 int ampersand = ent.indexOfChar ('&');
 835
 836                 while (ampersand >= 0)
 837                 {
 838                     const int semiColon = ent.indexOf (i + 1, ";");
 839
 840                     if (semiColon < 0)
 841                     {
 842                         setLastError ("entity without terminating semi-colon", false);
 843                         break;
 844                     }
 845
 846                     const String resolved (expandEntity (ent.substring (i + 1, semiColon)));
 847
 848                     ent = ent.substring (0, ampersand)
 849                            + resolved
 850                            + ent.substring (semiColon + 1);
 851
 852                     ampersand = ent.indexOfChar (semiColon + 1, '&');
 853                 }
 854
 855                 return ent;
 856             }
 857         }
 858     }
 859
 860     setLastError ("unknown entity", true);
 861
 862     return entity;
 863 }
 864
 865 String XmlDocument::getParameterEntity (const String& entity)
 866 {
 867     for (int i = 0; i < tokenisedDTD.size(); ++i)
 868     {
 869         if (tokenisedDTD[i] == entity
 870              && tokenisedDTD [i - 1] == "%"
 871              && tokenisedDTD [i - 2].equalsIgnoreCase ("<!entity"))
 872         {
 873             const String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">"));
 874
 875             if (ent.equalsIgnoreCase ("system"))
 876                 return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (">"));
 877
 878             return ent.trim().unquoted();
 879         }
 880     }
 881
 882     return entity;
 883 }
 884
 885 }