Merge pull request #25959 from neo1973/TagLib_deprecation_warnings
[xbmc.git] / lib / libUPnP / Neptune / Source / Core / NptXml.cpp
blobd6b95cad8c11fb9438afbed2fae4f1dfb0f2a25d
1 /*****************************************************************
3 | Neptune - Xml Support
5 | Copyright (c) 2002-2008, Axiomatic Systems, LLC.
6 | All rights reserved.
8 | Redistribution and use in source and binary forms, with or without
9 | modification, are permitted provided that the following conditions are met:
10 | * Redistributions of source code must retain the above copyright
11 | notice, this list of conditions and the following disclaimer.
12 | * Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 | * Neither the name of Axiomatic Systems nor the
16 | names of its contributors may be used to endorse or promote products
17 | derived from this software without specific prior written permission.
19 | THIS SOFTWARE IS PROVIDED BY AXIOMATIC SYSTEMS ''AS IS'' AND ANY
20 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL AXIOMATIC SYSTEMS BE LIABLE FOR ANY
23 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 ****************************************************************/
32 /*----------------------------------------------------------------------
33 | includes
34 +---------------------------------------------------------------------*/
35 #include "NptConfig.h"
36 #include "NptTypes.h"
37 #include "NptXml.h"
38 #include "NptUtils.h"
39 #include "NptMap.h"
40 #include "NptDebug.h"
42 /*----------------------------------------------------------------------
43 | local compilation flags
44 +---------------------------------------------------------------------*/
45 //#define NPT_XML_PARSER_DEBUG
46 #ifdef NPT_XML_PARSER_DEBUG
47 #define NPT_XML_Debug_0(s) NPT_Debug(s)
48 #define NPT_XML_Debug_1(s,x0) NPT_Debug(s,x0)
49 #define NPT_XML_Debug_2(s,x0,x1) NPT_Debug(s,x0,x1)
50 #define NPT_XML_Debug_3(s,x0,x1,x2) NPT_Debug(s,x0,x1,x2)
51 #define NPT_XML_Debug_4(s,x0,x1,x2,x3) NPT_Debug(s,x0,x1,x2,x3)
52 #else
53 #define NPT_XML_Debug_0(s)
54 #define NPT_XML_Debug_1(s,x0)
55 #define NPT_XML_Debug_2(s,x0,x1)
56 #define NPT_XML_Debug_3(s,x0,x1,x2)
57 #define NPT_XML_Debug_4(s,x0,x1,x2,x3)
58 #endif
60 /*----------------------------------------------------------------------
61 | constants
62 +---------------------------------------------------------------------*/
63 static const NPT_String
64 NPT_XmlNamespaceUri_Xml("http://www.w3.org/XML/1998/namespace");
66 /*----------------------------------------------------------------------
67 | NPT_XmlAttributeFinder
68 +---------------------------------------------------------------------*/
69 class NPT_XmlAttributeFinder
71 public:
72 // if 'namespc' is NULL, we're looking for ANY namespace
73 // if 'namespc' is '\0', we're looking for NO namespace
74 // if 'namespc' is non-empty, look for that SPECIFIC namespace
75 NPT_XmlAttributeFinder(const NPT_XmlElementNode& element,
76 const char* name,
77 const char* namespc) :
78 m_Element(element), m_Name(name), m_Namespace(namespc) {}
80 bool operator()(const NPT_XmlAttribute* const & attribute) const {
81 if (attribute->m_Name == m_Name) {
82 if (m_Namespace) {
83 const NPT_String& prefix = attribute->GetPrefix();
84 if (m_Namespace[0] == '\0') {
85 // match if the attribute has NO namespace
86 return prefix.IsEmpty();
87 } else {
88 // match if the attribute has the SPECIFIC namespace
89 // we're looking for
90 if (prefix.IsEmpty()) {
91 // attributes without a prefix don't have a namespace
92 return false;
93 } else {
94 const NPT_String* namespc = m_Element.GetNamespaceUri(prefix);
95 return namespc && *namespc == m_Namespace;
98 } else {
99 // ANY namespace will match
100 return true;
102 } else {
103 return false;
107 private:
108 const NPT_XmlElementNode& m_Element;
109 const char* m_Name;
110 const char* m_Namespace;
113 /*----------------------------------------------------------------------
114 | NPT_XmlAttributeFinderWithPrefix
115 +---------------------------------------------------------------------*/
116 class NPT_XmlAttributeFinderWithPrefix
118 public:
119 NPT_XmlAttributeFinderWithPrefix(const char* prefix, const char* name) :
120 m_Prefix(prefix?prefix:""), m_Name(name) {}
122 bool operator()(const NPT_XmlAttribute* const & attribute) const {
123 return attribute->m_Prefix == m_Prefix && attribute->m_Name == m_Name;
126 private:
127 const char* m_Prefix;
128 const char* m_Name;
131 /*----------------------------------------------------------------------
132 | NPT_XmlTagFinder
133 +---------------------------------------------------------------------*/
134 class NPT_XmlTagFinder
136 public:
137 // if 'namespc' is NULL, we're looking for ANY namespace
138 // if 'namespc' is '\0', we're looking for NO namespace
139 // if 'namespc' is non-empty, look for that SPECIFIC namespace
140 NPT_XmlTagFinder(const char* tag, const char* namespc) :
141 m_Tag(tag), m_Namespace(namespc) {}
143 bool operator()(const NPT_XmlNode* const & node) const {
144 const NPT_XmlElementNode* element = node->AsElementNode();
145 if (element && element->m_Tag == m_Tag) {
146 if (m_Namespace) {
147 // look for a SPECIFIC namespace or NO namespace
148 const NPT_String* namespc = element->GetNamespace();
149 if (namespc) {
150 // the element has a namespace, match if it is equal to
151 // what we're looking for
152 return *namespc == m_Namespace;
153 } else {
154 // the element does not have a namespace, match if we're
155 // looking for NO namespace
156 return m_Namespace[0] == '\0';
158 } else {
159 // ANY namespace will match
160 return true;
162 } else {
163 return false;
167 private:
168 const char* m_Tag;
169 const char* m_Namespace;
172 /*----------------------------------------------------------------------
173 | NPT_XmlTextFinder
174 +---------------------------------------------------------------------*/
175 class NPT_XmlTextFinder
177 public:
178 bool operator()(const NPT_XmlNode* const & node) const {
179 return node->AsTextNode() != NULL;
183 /*----------------------------------------------------------------------
184 | NPT_XmlNamespaceCollapser
185 +---------------------------------------------------------------------*/
186 class NPT_XmlNamespaceCollapser
188 public:
189 NPT_XmlNamespaceCollapser(NPT_XmlElementNode* element) :
190 m_Root(element) {}
192 void operator()(NPT_XmlNode*& node) const {
193 NPT_XmlElementNode* element = node->AsElementNode();
194 if (element == NULL) return;
196 // collapse the namespace for this element
197 CollapseNamespace(element, element->GetPrefix());
199 // collapse the namespaces for the attributes
200 NPT_List<NPT_XmlAttribute*>::Iterator item = element->GetAttributes().GetFirstItem();
201 while (item) {
202 NPT_XmlAttribute* attribute = *item;
203 CollapseNamespace(element, attribute->GetPrefix());
204 ++item;
207 // recurse to the children
208 element->GetChildren().Apply(*this);
211 private:
212 // methods
213 void CollapseNamespace(NPT_XmlElementNode* element, const NPT_String& prefix) const;
215 // members
216 NPT_XmlElementNode* m_Root;
219 /*----------------------------------------------------------------------
220 | NPT_XmlNamespaceCollapser::CollapseNamespace
221 +---------------------------------------------------------------------*/
222 void
223 NPT_XmlNamespaceCollapser::CollapseNamespace(NPT_XmlElementNode* element,
224 const NPT_String& prefix) const
226 if (m_Root->m_NamespaceMap == NULL ||
227 (m_Root->m_NamespaceMap->GetNamespaceUri(prefix) == NULL && prefix != "xml")) {
228 // the root element does not have that prefix in the map
229 const NPT_String* uri = element->GetNamespaceUri(prefix);
230 if (uri) m_Root->SetNamespaceUri(prefix, uri->GetChars());
234 /*----------------------------------------------------------------------
235 | NPT_XmlAttribute::NPT_XmlAttribute
236 +---------------------------------------------------------------------*/
237 NPT_XmlAttribute::NPT_XmlAttribute(const char* name, const char* value) :
238 m_Value(value)
240 const char* cursor = name;
241 while (char c = *cursor++) {
242 if (c == ':') {
243 unsigned int prefix_length = (unsigned int)(cursor-name)-1;
244 m_Prefix.Assign(name, prefix_length);
245 name = cursor;
246 break;
249 m_Name = name;
252 /*----------------------------------------------------------------------
253 | NPT_XmlElementNode::NPT_XmlElementNode
254 +---------------------------------------------------------------------*/
255 NPT_XmlElementNode::NPT_XmlElementNode(const char* prefix, const char* tag) :
256 NPT_XmlNode(ELEMENT),
257 m_Prefix(prefix),
258 m_Tag(tag),
259 m_NamespaceMap(NULL),
260 m_NamespaceParent(NULL)
264 /*----------------------------------------------------------------------
265 | NPT_XmlElementNode::NPT_XmlElementNode
266 +---------------------------------------------------------------------*/
267 NPT_XmlElementNode::NPT_XmlElementNode(const char* tag) :
268 NPT_XmlNode(ELEMENT),
269 m_NamespaceMap(NULL),
270 m_NamespaceParent(NULL)
272 const char* cursor = tag;
273 while (char c = *cursor++) {
274 if (c == ':') {
275 unsigned int prefix_length = (unsigned int)(cursor-tag)-1;
276 m_Prefix.Assign(tag, prefix_length);
277 tag = cursor;
278 break;
281 m_Tag = tag;
284 /*----------------------------------------------------------------------
285 | NPT_XmlElementNode::~NPT_XmlElementNode
286 +---------------------------------------------------------------------*/
287 NPT_XmlElementNode::~NPT_XmlElementNode()
289 m_Children.Apply(NPT_ObjectDeleter<NPT_XmlNode>());
290 m_Attributes.Apply(NPT_ObjectDeleter<NPT_XmlAttribute>());
291 delete m_NamespaceMap;
294 /*----------------------------------------------------------------------
295 | NPT_XmlElementNode::SetParent
296 +---------------------------------------------------------------------*/
297 void
298 NPT_XmlElementNode::SetParent(NPT_XmlNode* parent)
300 // update our parent
301 m_Parent = parent;
303 // update out namespace linkage
304 NPT_XmlElementNode* parent_element =
305 parent?parent->AsElementNode():NULL;
306 NPT_XmlElementNode* namespace_parent;
307 if (parent_element) {
308 namespace_parent =
309 parent_element->m_NamespaceMap ?
310 parent_element:
311 parent_element->m_NamespaceParent;
312 } else {
313 namespace_parent = NULL;
315 if (namespace_parent != m_NamespaceParent) {
316 m_NamespaceParent = namespace_parent;
317 RelinkNamespaceMaps();
321 /*----------------------------------------------------------------------
322 | NPT_XmlElementNode::AddChild
323 +---------------------------------------------------------------------*/
324 NPT_Result
325 NPT_XmlElementNode::AddChild(NPT_XmlNode* child)
327 if (child == NULL) return NPT_ERROR_INVALID_PARAMETERS;
328 child->SetParent(this);
329 return m_Children.Add(child);
332 /*----------------------------------------------------------------------
333 | NPT_XmlElementNode::GetChild
334 +---------------------------------------------------------------------*/
335 NPT_XmlElementNode*
336 NPT_XmlElementNode::GetChild(const char* tag, const char* namespc, NPT_Ordinal n) const
338 // remap the requested namespace to match the semantics of the finder
339 // and allow for "" to also mean NO namespace
340 if (namespc == NULL || namespc[0] == '\0') {
341 namespc = ""; // for the finder, empty string means NO namespace
342 } else if (namespc[0] == '*' && namespc[1] == '\0') {
343 namespc = NULL; // for the finder, NULL means ANY namespace
346 // find the child
347 NPT_List<NPT_XmlNode*>::Iterator item;
348 item = m_Children.Find(NPT_XmlTagFinder(tag, namespc), n);
349 return item?(*item)->AsElementNode():NULL;
352 /*----------------------------------------------------------------------
353 | NPT_XmlElementNode::AddAttribute
354 +---------------------------------------------------------------------*/
355 NPT_Result
356 NPT_XmlElementNode::AddAttribute(const char* name,
357 const char* value)
359 if (name == NULL || value == NULL) return NPT_ERROR_INVALID_PARAMETERS;
360 return m_Attributes.Add(new NPT_XmlAttribute(name, value));
363 /*----------------------------------------------------------------------
364 | NPT_XmlElementNode::SetAttribute
365 +---------------------------------------------------------------------*/
366 NPT_Result
367 NPT_XmlElementNode::SetAttribute(const char* prefix,
368 const char* name,
369 const char* value)
371 if (name == NULL || value == NULL) return NPT_ERROR_INVALID_PARAMETERS;
373 /* see if this attribute is already set */
374 NPT_List<NPT_XmlAttribute*>::Iterator attribute;
375 attribute = m_Attributes.Find(NPT_XmlAttributeFinderWithPrefix(prefix, name));
376 if (attribute) {
377 // an attribute with this name and prefix already exists,
378 // change its value
379 (*attribute)->SetValue(value);
380 return NPT_SUCCESS;
382 return m_Attributes.Add(new NPT_XmlAttribute(prefix, name, value));
385 /*----------------------------------------------------------------------
386 | NPT_XmlElementNode::SetAttribute
387 +---------------------------------------------------------------------*/
388 NPT_Result
389 NPT_XmlElementNode::SetAttribute(const char* name, const char* value)
391 return SetAttribute(NULL, name, value);
394 /*----------------------------------------------------------------------
395 | NPT_XmlElementNode::GetAttribute
396 +---------------------------------------------------------------------*/
397 const NPT_String*
398 NPT_XmlElementNode::GetAttribute(const char* name, const char* namespc) const
400 // remap the requested namespace to match the semantics of the finder
401 // and allow for "" to also mean NO namespace
402 if (namespc == NULL || namespc[0] == '\0') {
403 namespc = ""; // for the finder, empty string means NO namespace
404 } else if (namespc[0] == '*' && namespc[1] == '\0') {
405 namespc = NULL; // for the finder, NULL means ANY namespace
408 // find the attribute
409 NPT_List<NPT_XmlAttribute*>::Iterator attribute;
410 attribute = m_Attributes.Find(NPT_XmlAttributeFinder(*this, name, namespc));
411 if (attribute) {
412 return &(*attribute)->GetValue();
413 } else {
414 return NULL;
418 /*----------------------------------------------------------------------
419 | NPT_XmlElementNode::AddText
420 +---------------------------------------------------------------------*/
421 NPT_Result
422 NPT_XmlElementNode::AddText(const char* text)
424 return AddChild(new NPT_XmlTextNode(NPT_XmlTextNode::CHARACTER_DATA, text));
427 /*----------------------------------------------------------------------
428 | NPT_XmlElementNode::GetText
429 +---------------------------------------------------------------------*/
430 const NPT_String*
431 NPT_XmlElementNode::GetText(NPT_Ordinal n) const
433 NPT_List<NPT_XmlNode*>::Iterator node;
434 node = m_Children.Find(NPT_XmlTextFinder(), n);
435 return node?&(*node)->AsTextNode()->GetString():NULL;
438 /*----------------------------------------------------------------------
439 | NPT_XmlElementNode::MakeStandalone
440 +---------------------------------------------------------------------*/
441 NPT_Result
442 NPT_XmlElementNode::MakeStandalone()
444 NPT_XmlNamespaceCollapser collapser(this);
445 NPT_XmlNode* node_pointer = this;
446 collapser(node_pointer);
448 return NPT_SUCCESS;
451 /*----------------------------------------------------------------------
452 | NPT_XmlElementNode::RelinkNamespaceMaps
453 +---------------------------------------------------------------------*/
454 void
455 NPT_XmlElementNode::RelinkNamespaceMaps()
457 // update our children so that they can inherit the right
458 // namespace map
459 NPT_List<NPT_XmlNode*>::Iterator item = m_Children.GetFirstItem();
460 while (item) {
461 NPT_XmlElementNode* element = (*item)->AsElementNode();
462 if (element) {
463 if (m_NamespaceMap) {
464 // we have a map, so our children point to us
465 element->SetNamespaceParent(this);
466 } else {
467 // we don't have a map, so our children point to
468 // where we also point
469 element->SetNamespaceParent(m_NamespaceParent);
472 ++item;
476 /*----------------------------------------------------------------------
477 | NPT_XmlElementNode::SetNamespaceParent
478 +---------------------------------------------------------------------*/
479 void
480 NPT_XmlElementNode::SetNamespaceParent(NPT_XmlElementNode* parent)
482 m_NamespaceParent = parent;
483 RelinkNamespaceMaps();
486 /*----------------------------------------------------------------------
487 | NPT_XmlElementNode::SetNamespaceUri
488 +---------------------------------------------------------------------*/
489 NPT_Result
490 NPT_XmlElementNode::SetNamespaceUri(const char* prefix, const char* uri)
492 // ensure that we have a namespace map
493 if (m_NamespaceMap == NULL) {
494 m_NamespaceMap = new NPT_XmlNamespaceMap();
495 RelinkNamespaceMaps();
498 return m_NamespaceMap->SetNamespaceUri(prefix, uri);
501 /*----------------------------------------------------------------------
502 | NPT_XmlElementNode::GetNamespaceUri
503 +---------------------------------------------------------------------*/
504 const NPT_String*
505 NPT_XmlElementNode::GetNamespaceUri(const char* prefix) const
507 if (m_NamespaceMap) {
508 // look in our namespace map first
509 const NPT_String* namespc = m_NamespaceMap->GetNamespaceUri(prefix);
510 if (namespc) {
511 if (namespc->IsEmpty()) {
512 return NULL;
513 } else {
514 return namespc;
519 // look into our parent's namespace map
520 if (m_NamespaceParent) {
521 return m_NamespaceParent->GetNamespaceUri(prefix);
522 } else {
523 // check if this is a well-known namespace
524 if (prefix[0] == 'x' &&
525 prefix[1] == 'm' &&
526 prefix[2] == 'l' &&
527 prefix[3] == '\0') {
528 return &NPT_XmlNamespaceUri_Xml;
531 // not found
532 return NULL;
536 /*----------------------------------------------------------------------
537 | NPT_XmlElementNode::GetNamespace
538 +---------------------------------------------------------------------*/
539 const NPT_String*
540 NPT_XmlElementNode::GetNamespace() const
542 return GetNamespaceUri(m_Prefix);
545 /*----------------------------------------------------------------------
546 | NPT_XmlElementNode::GetNamespacePrefix
547 +---------------------------------------------------------------------*/
548 const NPT_String*
549 NPT_XmlElementNode::GetNamespacePrefix(const char* uri) const
551 NPT_XmlNamespaceMap* namespace_map =
552 m_NamespaceMap?
553 m_NamespaceMap:
554 (m_NamespaceParent?
555 m_NamespaceParent->m_NamespaceMap:
556 NULL);
558 if (namespace_map) {
559 return namespace_map->GetNamespacePrefix(uri);
560 } else {
561 return NULL;
565 /*----------------------------------------------------------------------
566 | NPT_XmlTextNode::NPT_XmlTextNode
567 +---------------------------------------------------------------------*/
568 NPT_XmlTextNode::NPT_XmlTextNode(TokenType token_type, const char* text) :
569 NPT_XmlNode(TEXT),
570 m_TokenType(token_type),
571 m_Text(text)
575 /*----------------------------------------------------------------------
576 | NPT_XmlAccumulator
577 +---------------------------------------------------------------------*/
578 class NPT_XmlAccumulator {
579 public:
580 NPT_XmlAccumulator();
581 ~NPT_XmlAccumulator();
582 void Append(char c);
583 void Append(const char* s);
584 void AppendUTF8(unsigned int c);
585 void Reset() { m_Valid = 0; }
586 const char* GetString();
587 NPT_Size GetSize() const { return m_Valid; }
588 const unsigned char* GetBuffer() const { return m_Buffer; }
590 private:
591 // methods
592 void Allocate(NPT_Size size);
594 // members
595 unsigned char* m_Buffer;
596 NPT_Size m_Allocated;
597 NPT_Size m_Valid;
600 /*----------------------------------------------------------------------
601 | NPT_XmlAccumulator::NPT_XmlAccumulator
602 +---------------------------------------------------------------------*/
603 NPT_XmlAccumulator::NPT_XmlAccumulator() :
604 m_Buffer(NULL),
605 m_Allocated(0),
606 m_Valid(0)
610 /*----------------------------------------------------------------------
611 | NPT_XmlAccumulator::~NPT_XmlAccumulator
612 +---------------------------------------------------------------------*/
613 NPT_XmlAccumulator::~NPT_XmlAccumulator()
615 delete[] m_Buffer;
618 /*----------------------------------------------------------------------
619 | NPT_XmlAccumulator::Allocate
620 +---------------------------------------------------------------------*/
621 void
622 NPT_XmlAccumulator::Allocate(NPT_Size size)
624 // check if we have enough
625 if (m_Allocated >= size) return;
627 // compute new size
628 do {
629 m_Allocated = m_Allocated ? m_Allocated * 2 : 32;
630 } while (m_Allocated < size);
632 // reallocate
633 unsigned char* new_buffer = new unsigned char[m_Allocated];
634 NPT_CopyMemory(new_buffer, m_Buffer, m_Valid);
635 delete[] m_Buffer;
636 m_Buffer = new_buffer;
639 /*----------------------------------------------------------------------
640 | NPT_XmlAccumulator::Append
641 +---------------------------------------------------------------------*/
642 inline void
643 NPT_XmlAccumulator::Append(char c)
645 NPT_Size needed = m_Valid+1;
646 if (needed > m_Allocated) Allocate(needed);
647 m_Buffer[m_Valid++] = c;
650 /*----------------------------------------------------------------------
651 | NPT_XmlAccumulator::Append
652 +---------------------------------------------------------------------*/
653 void
654 NPT_XmlAccumulator::Append(const char* s)
656 char c;
657 while ((c = *s++)) Append(c);
660 /*----------------------------------------------------------------------
661 | NPT_XmlAccumulator::AppendUTF8
662 +---------------------------------------------------------------------*/
663 inline void
664 NPT_XmlAccumulator::AppendUTF8(unsigned int c)
666 NPT_Size needed = m_Valid+4; // allocate 4 more chars
667 if (needed > m_Allocated) Allocate(needed);
669 if (c <= 0x7F) {
670 // 000000-00007F -> 1 char = 0xxxxxxx
671 m_Buffer[m_Valid++] = (char)c;
672 } else if (c <= 0x7FF) {
673 // 000080-0007FF -> 2 chars = 110zzzzx 10xxxxxx
674 m_Buffer[m_Valid++] = 0xC0|(c>>6 );
675 m_Buffer[m_Valid++] = 0x80|(c&0x3F);
676 } else if (c <= 0xFFFF) {
677 // 000800-00FFFF -> 3 chars = 1110zzzz 10zxxxxx 10xxxxxx
678 m_Buffer[m_Valid++] = 0xE0| (c>>12 );
679 m_Buffer[m_Valid++] = 0x80|((c&0xFC0)>>6);
680 m_Buffer[m_Valid++] = 0x80| (c&0x3F );
681 } else if (c <= 0x10FFFF) {
682 // 010000-10FFFF -> 4 chars = 11110zzz 10zzxxxx 10xxxxxx 10xxxxxx
683 m_Buffer[m_Valid++] = 0xF0| (c>>18 );
684 m_Buffer[m_Valid++] = 0x80|((c&0x3F000)>>12);
685 m_Buffer[m_Valid++] = 0x80|((c&0xFC0 )>> 6);
686 m_Buffer[m_Valid++] = 0x80| (c&0x3F );
690 /*----------------------------------------------------------------------
691 | NPT_XmlAccumulator::GetString
692 +---------------------------------------------------------------------*/
693 inline const char*
694 NPT_XmlAccumulator::GetString()
696 // ensure that the buffer is NULL terminated
697 Allocate(m_Valid+1);
698 m_Buffer[m_Valid] = '\0';
699 return (const char*)m_Buffer;
702 /*----------------------------------------------------------------------
703 | NPT_XmlNamespaceMap::~NPT_XmlNamespaceMap
704 +---------------------------------------------------------------------*/
705 NPT_XmlNamespaceMap::~NPT_XmlNamespaceMap()
707 m_Entries.Apply(NPT_ObjectDeleter<Entry>());
710 /*----------------------------------------------------------------------
711 | NPT_XmlNamespaceMap::SetNamespaceUri
712 +---------------------------------------------------------------------*/
713 NPT_Result
714 NPT_XmlNamespaceMap::SetNamespaceUri(const char* prefix, const char* uri)
716 NPT_List<Entry*>::Iterator item = m_Entries.GetFirstItem();
717 while (item) {
718 if ((*item)->m_Prefix == prefix) {
719 // the prefix is already in the map, update the value
720 (*item)->m_Uri = uri;
721 return NPT_SUCCESS;
723 ++item;
726 // the prefix is not in the map, add it
727 return m_Entries.Add(new Entry(prefix, uri));
730 /*----------------------------------------------------------------------
731 | NPT_XmlNamespaceMap::GetNamespaceUri
732 +---------------------------------------------------------------------*/
733 const NPT_String*
734 NPT_XmlNamespaceMap::GetNamespaceUri(const char* prefix)
736 NPT_List<Entry*>::Iterator item = m_Entries.GetFirstItem();
737 while (item) {
738 if ((*item)->m_Prefix == prefix) {
739 // match
740 return &(*item)->m_Uri;
742 ++item;
745 // the prefix is not in the map
746 return NULL;
749 /*----------------------------------------------------------------------
750 | NPT_XmlNamespaceMap::GetNamespacePrefix
751 +---------------------------------------------------------------------*/
752 const NPT_String*
753 NPT_XmlNamespaceMap::GetNamespacePrefix(const char* uri)
755 NPT_List<Entry*>::Iterator item = m_Entries.GetFirstItem();
756 while (item) {
757 if ((*item)->m_Uri == uri) {
758 // match
759 return &(*item)->m_Prefix;
761 ++item;
764 // the uri is not in the map
765 return NULL;
768 /*----------------------------------------------------------------------
769 | character map
771 | flags:
772 | 1 --> any char
773 | 2 --> whitespace
774 | 4 --> name
775 | 8 --> content
776 | 16 --> value
777 +---------------------------------------------------------------------*/
778 #define NPT_XML_USE_CHAR_MAP
779 #if defined(NPT_XML_USE_CHAR_MAP)
780 // NOTE: this table is generated by the ruby script 'XmlCharMap.rb'
781 static const unsigned char NPT_XmlCharMap[256] = {
782 0, // 0 0x00
783 0, // 1 0x01
784 0, // 2 0x02
785 0, // 3 0x03
786 0, // 4 0x04
787 0, // 5 0x05
788 0, // 6 0x06
789 0, // 7 0x07
790 0, // 8 0x08
791 1|2|8|16, // 9 0x09
792 1|2|8|16, // 10 0x0a
793 0, // 11 0x0b
794 0, // 12 0x0c
795 1|2|8|16, // 13 0x0d
796 0, // 14 0x0e
797 0, // 15 0x0f
798 0, // 16 0x10
799 0, // 17 0x11
800 0, // 18 0x12
801 0, // 19 0x13
802 0, // 20 0x14
803 0, // 21 0x15
804 0, // 22 0x16
805 0, // 23 0x17
806 0, // 24 0x18
807 0, // 25 0x19
808 0, // 26 0x1a
809 0, // 27 0x1b
810 0, // 28 0x1c
811 0, // 29 0x1d
812 0, // 30 0x1e
813 0, // 31 0x1f
814 1|2|8|16, // 32 0x20 ' '
815 1|8|16, // 33 0x21 '!'
816 1|8|16, // 34 0x22 '"'
817 1|8|16, // 35 0x23 '#'
818 1|8|16, // 36 0x24 '$'
819 1|8|16, // 37 0x25 '%'
820 1, // 38 0x26 '&'
821 1|8|16, // 39 0x27 '''
822 1|8|16, // 40 0x28 '('
823 1|8|16, // 41 0x29 ')'
824 1|8|16, // 42 0x2a '*'
825 1|8|16, // 43 0x2b '+'
826 1|8|16, // 44 0x2c ','
827 1|4|8|16, // 45 0x2d '-'
828 1|4|8|16, // 46 0x2e '.'
829 1|8|16, // 47 0x2f '/'
830 1|4|8|16, // 48 0x30 '0'
831 1|4|8|16, // 49 0x31 '1'
832 1|4|8|16, // 50 0x32 '2'
833 1|4|8|16, // 51 0x33 '3'
834 1|4|8|16, // 52 0x34 '4'
835 1|4|8|16, // 53 0x35 '5'
836 1|4|8|16, // 54 0x36 '6'
837 1|4|8|16, // 55 0x37 '7'
838 1|4|8|16, // 56 0x38 '8'
839 1|4|8|16, // 57 0x39 '9'
840 1|4|8|16, // 58 0x3a ':'
841 1|8|16, // 59 0x3b ';'
842 1, // 60 0x3c '<'
843 1|8|16, // 61 0x3d '='
844 1|8|16, // 62 0x3e '>'
845 1|8|16, // 63 0x3f '?'
846 1|8|16, // 64 0x40 '@'
847 1|4|8|16, // 65 0x41 'A'
848 1|4|8|16, // 66 0x42 'B'
849 1|4|8|16, // 67 0x43 'C'
850 1|4|8|16, // 68 0x44 'D'
851 1|4|8|16, // 69 0x45 'E'
852 1|4|8|16, // 70 0x46 'F'
853 1|4|8|16, // 71 0x47 'G'
854 1|4|8|16, // 72 0x48 'H'
855 1|4|8|16, // 73 0x49 'I'
856 1|4|8|16, // 74 0x4a 'J'
857 1|4|8|16, // 75 0x4b 'K'
858 1|4|8|16, // 76 0x4c 'L'
859 1|4|8|16, // 77 0x4d 'M'
860 1|4|8|16, // 78 0x4e 'N'
861 1|4|8|16, // 79 0x4f 'O'
862 1|4|8|16, // 80 0x50 'P'
863 1|4|8|16, // 81 0x51 'Q'
864 1|4|8|16, // 82 0x52 'R'
865 1|4|8|16, // 83 0x53 'S'
866 1|4|8|16, // 84 0x54 'T'
867 1|4|8|16, // 85 0x55 'U'
868 1|4|8|16, // 86 0x56 'V'
869 1|4|8|16, // 87 0x57 'W'
870 1|4|8|16, // 88 0x58 'X'
871 1|4|8|16, // 89 0x59 'Y'
872 1|4|8|16, // 90 0x5a 'Z'
873 1|8|16, // 91 0x5b '['
874 1|8|16, // 92 0x5c '\'
875 1|8|16, // 93 0x5d ']'
876 1|8|16, // 94 0x5e '^'
877 1|4|8|16, // 95 0x5f '_'
878 1|8|16, // 96 0x60 '`'
879 1|4|8|16, // 97 0x61 'a'
880 1|4|8|16, // 98 0x62 'b'
881 1|4|8|16, // 99 0x63 'c'
882 1|4|8|16, // 100 0x64 'd'
883 1|4|8|16, // 101 0x65 'e'
884 1|4|8|16, // 102 0x66 'f'
885 1|4|8|16, // 103 0x67 'g'
886 1|4|8|16, // 104 0x68 'h'
887 1|4|8|16, // 105 0x69 'i'
888 1|4|8|16, // 106 0x6a 'j'
889 1|4|8|16, // 107 0x6b 'k'
890 1|4|8|16, // 108 0x6c 'l'
891 1|4|8|16, // 109 0x6d 'm'
892 1|4|8|16, // 110 0x6e 'n'
893 1|4|8|16, // 111 0x6f 'o'
894 1|4|8|16, // 112 0x70 'p'
895 1|4|8|16, // 113 0x71 'q'
896 1|4|8|16, // 114 0x72 'r'
897 1|4|8|16, // 115 0x73 's'
898 1|4|8|16, // 116 0x74 't'
899 1|4|8|16, // 117 0x75 'u'
900 1|4|8|16, // 118 0x76 'v'
901 1|4|8|16, // 119 0x77 'w'
902 1|4|8|16, // 120 0x78 'x'
903 1|4|8|16, // 121 0x79 'y'
904 1|4|8|16, // 122 0x7a 'z'
905 1|8|16, // 123 0x7b '{'
906 1|8|16, // 124 0x7c '|'
907 1|8|16, // 125 0x7d '}'
908 1|8|16, // 126 0x7e '~'
909 1|8|16, // 127 0x7f
910 1|8|16, // 128 0x80
911 1|8|16, // 129 0x81
912 1|8|16, // 130 0x82
913 1|8|16, // 131 0x83
914 1|8|16, // 132 0x84
915 1|8|16, // 133 0x85
916 1|8|16, // 134 0x86
917 1|8|16, // 135 0x87
918 1|8|16, // 136 0x88
919 1|8|16, // 137 0x89
920 1|8|16, // 138 0x8a
921 1|8|16, // 139 0x8b
922 1|8|16, // 140 0x8c
923 1|8|16, // 141 0x8d
924 1|8|16, // 142 0x8e
925 1|8|16, // 143 0x8f
926 1|8|16, // 144 0x90
927 1|8|16, // 145 0x91
928 1|8|16, // 146 0x92
929 1|8|16, // 147 0x93
930 1|8|16, // 148 0x94
931 1|8|16, // 149 0x95
932 1|8|16, // 150 0x96
933 1|8|16, // 151 0x97
934 1|8|16, // 152 0x98
935 1|8|16, // 153 0x99
936 1|8|16, // 154 0x9a
937 1|8|16, // 155 0x9b
938 1|8|16, // 156 0x9c
939 1|8|16, // 157 0x9d
940 1|8|16, // 158 0x9e
941 1|8|16, // 159 0x9f
942 1|8|16, // 160 0xa0
943 1|8|16, // 161 0xa1
944 1|8|16, // 162 0xa2
945 1|8|16, // 163 0xa3
946 1|8|16, // 164 0xa4
947 1|8|16, // 165 0xa5
948 1|8|16, // 166 0xa6
949 1|8|16, // 167 0xa7
950 1|8|16, // 168 0xa8
951 1|8|16, // 169 0xa9
952 1|8|16, // 170 0xaa
953 1|8|16, // 171 0xab
954 1|8|16, // 172 0xac
955 1|8|16, // 173 0xad
956 1|8|16, // 174 0xae
957 1|8|16, // 175 0xaf
958 1|8|16, // 176 0xb0
959 1|8|16, // 177 0xb1
960 1|8|16, // 178 0xb2
961 1|8|16, // 179 0xb3
962 1|8|16, // 180 0xb4
963 1|8|16, // 181 0xb5
964 1|8|16, // 182 0xb6
965 1|8|16, // 183 0xb7
966 1|8|16, // 184 0xb8
967 1|8|16, // 185 0xb9
968 1|8|16, // 186 0xba
969 1|8|16, // 187 0xbb
970 1|8|16, // 188 0xbc
971 1|8|16, // 189 0xbd
972 1|8|16, // 190 0xbe
973 1|8|16, // 191 0xbf
974 1|4|8|16, // 192 0xc0
975 1|4|8|16, // 193 0xc1
976 1|4|8|16, // 194 0xc2
977 1|4|8|16, // 195 0xc3
978 1|4|8|16, // 196 0xc4
979 1|4|8|16, // 197 0xc5
980 1|4|8|16, // 198 0xc6
981 1|4|8|16, // 199 0xc7
982 1|4|8|16, // 200 0xc8
983 1|4|8|16, // 201 0xc9
984 1|4|8|16, // 202 0xca
985 1|4|8|16, // 203 0xcb
986 1|4|8|16, // 204 0xcc
987 1|4|8|16, // 205 0xcd
988 1|4|8|16, // 206 0xce
989 1|4|8|16, // 207 0xcf
990 1|4|8|16, // 208 0xd0
991 1|4|8|16, // 209 0xd1
992 1|4|8|16, // 210 0xd2
993 1|4|8|16, // 211 0xd3
994 1|4|8|16, // 212 0xd4
995 1|4|8|16, // 213 0xd5
996 1|4|8|16, // 214 0xd6
997 1|8|16, // 215 0xd7
998 1|4|8|16, // 216 0xd8
999 1|4|8|16, // 217 0xd9
1000 1|4|8|16, // 218 0xda
1001 1|4|8|16, // 219 0xdb
1002 1|4|8|16, // 220 0xdc
1003 1|4|8|16, // 221 0xdd
1004 1|4|8|16, // 222 0xde
1005 1|4|8|16, // 223 0xdf
1006 1|4|8|16, // 224 0xe0
1007 1|4|8|16, // 225 0xe1
1008 1|4|8|16, // 226 0xe2
1009 1|4|8|16, // 227 0xe3
1010 1|4|8|16, // 228 0xe4
1011 1|4|8|16, // 229 0xe5
1012 1|4|8|16, // 230 0xe6
1013 1|4|8|16, // 231 0xe7
1014 1|4|8|16, // 232 0xe8
1015 1|4|8|16, // 233 0xe9
1016 1|4|8|16, // 234 0xea
1017 1|4|8|16, // 235 0xeb
1018 1|4|8|16, // 236 0xec
1019 1|4|8|16, // 237 0xed
1020 1|4|8|16, // 238 0xee
1021 1|4|8|16, // 239 0xef
1022 1|4|8|16, // 240 0xf0
1023 1|4|8|16, // 241 0xf1
1024 1|4|8|16, // 242 0xf2
1025 1|4|8|16, // 243 0xf3
1026 1|4|8|16, // 244 0xf4
1027 1|4|8|16, // 245 0xf5
1028 1|4|8|16, // 246 0xf6
1029 1|8|16, // 247 0xf7
1030 1|4|8|16, // 248 0xf8
1031 1|4|8|16, // 249 0xf9
1032 1|4|8|16, // 250 0xfa
1033 1|4|8|16, // 251 0xfb
1034 1|4|8|16, // 252 0xfc
1035 1|4|8|16, // 253 0xfd
1036 1|4|8|16, // 254 0xfe
1037 1|4|8|16 // 255 0xff
1039 #endif // defined(NPT_XML_USE_CHAR_MAP)
1041 /*----------------------------------------------------------------------
1042 | macros
1043 +---------------------------------------------------------------------*/
1044 #if defined (NPT_XML_USE_CHAR_MAP)
1045 #define NPT_XML_CHAR_IS_ANY_CHAR(c) (NPT_XmlCharMap[c] & 1)
1046 #define NPT_XML_CHAR_IS_WHITESPACE(c) (NPT_XmlCharMap[c] & 2)
1047 #define NPT_XML_CHAR_IS_NAME_CHAR(c) (NPT_XmlCharMap[c] & 4)
1048 #define NPT_XML_CHAR_IS_ENTITY_REF_CHAR(c) (NPT_XML_CHAR_IS_NAME_CHAR((c)) || ((c) == '#'))
1049 #define NPT_XML_CHAR_IS_CONTENT_CHAR(c) (NPT_XmlCharMap[c] & 8)
1050 #define NPT_XML_CHAR_IS_VALUE_CHAR(c) (NPT_XmlCharMap[c] & 16)
1051 #else
1052 #define NPT_XML_CHAR_IS_WHITESPACE(c) \
1053 ((c) == ' ' || (c) == '\t' || (c) == 0x0D || (c) == 0x0A)
1055 #define NPT_XML_CHAR_IS_ANY_CHAR(c) \
1056 (NPT_XML_CHAR_IS_WHITESPACE((c)) || ((c) >= 0x20))
1058 #define NPT_XML_CHAR_IS_DIGIT(c) \
1059 ((c) >= '0' && (c) <= '9')
1061 #define NPT_XML_CHAR_IS_LETTER(c) \
1062 (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z') || ((c) >= 0xC0 && (c) <= 0xD6) || ((c) >= 0xD8 && (c) <= 0xF6) || ((c) >= 0xF8))
1064 #define NPT_XML_CHAR_IS_NAME_CHAR(c) \
1065 (NPT_XML_CHAR_IS_DIGIT((c)) || NPT_XML_CHAR_IS_LETTER((c)) || (c) == '.' || (c) == '-' || (c) == '_' || (c) == ':')
1067 #define NPT_XML_CHAR_IS_ENTITY_REF_CHAR(c) \
1068 (NPT_XML_CHAR_IS_NAME_CHAR((c)) || ((c) == '#'))
1070 #define NPT_XML_CHAR_IS_CONTENT_CHAR(c) \
1071 (NPT_XML_CHAR_IS_ANY_CHAR((c)) && ((c) != '&') && ((c) != '<'))
1073 #define NPT_XML_CHAR_IS_VALUE_CHAR(c) \
1074 (NPT_XML_CHAR_IS_ANY_CHAR((c)) && ((c) != '&') && ((c) != '<'))
1076 #endif // defined(NPT_XML_USE_CHAR_MAP)
1078 /*----------------------------------------------------------------------
1079 | NPT_XmlStringIsWhitespace
1080 +---------------------------------------------------------------------*/
1081 static bool
1082 NPT_XmlStringIsWhitespace(const char* s, NPT_Size size)
1084 for (NPT_Size x=0; x<size; x++) {
1085 if (!NPT_XML_CHAR_IS_WHITESPACE((int)s[x])) {
1086 return false;
1090 return true;
1093 /*----------------------------------------------------------------------
1094 | NPT_XmlProcessor class
1095 +---------------------------------------------------------------------*/
1096 class NPT_XmlProcessor {
1097 public:
1098 // constructor and destructor
1099 NPT_XmlProcessor(NPT_XmlParser* parser);
1101 // methods
1102 NPT_Result ProcessBuffer(const char* buffer, NPT_Size size);
1103 void Reset();
1105 private:
1106 // types
1107 typedef enum {
1108 CONTEXT_NONE,
1109 CONTEXT_OPEN_TAG,
1110 CONTEXT_CLOSE_TAG,
1111 CONTEXT_ATTRIBUTE,
1112 CONTEXT_VALUE_SINGLE_QUOTE,
1113 CONTEXT_VALUE_DOUBLE_QUOTE
1114 } Context;
1116 typedef enum {
1117 STATE_IN_INIT,
1118 STATE_IN_BOM_EF,
1119 STATE_IN_BOM_BB,
1120 STATE_IN_WHITESPACE,
1121 STATE_IN_NAME,
1122 STATE_IN_NAME_SPECIAL,
1123 STATE_IN_VALUE_START,
1124 STATE_IN_VALUE,
1125 STATE_IN_TAG_START,
1126 STATE_IN_EMPTY_TAG_END,
1127 STATE_IN_CONTENT,
1128 STATE_IN_PROCESSING_INSTRUCTION_START,
1129 STATE_IN_PROCESSING_INSTRUCTION,
1130 STATE_IN_PROCESSING_INSTRUCTION_END,
1131 STATE_IN_COMMENT,
1132 STATE_IN_COMMENT_END_1,
1133 STATE_IN_COMMENT_END_2,
1134 STATE_IN_DTD,
1135 STATE_IN_DTD_MARKUP_DECL,
1136 STATE_IN_DTD_MARKUP_DECL_END,
1137 STATE_IN_CDATA,
1138 STATE_IN_CDATA_END_1,
1139 STATE_IN_CDATA_END_2,
1140 STATE_IN_SPECIAL,
1141 STATE_IN_ENTITY_REF
1142 } State;
1144 // members
1145 NPT_XmlParser* m_Parser;
1146 State m_State;
1147 Context m_Context;
1148 bool m_SkipNewline;
1149 NPT_XmlAccumulator m_Name;
1150 NPT_XmlAccumulator m_Value;
1151 NPT_XmlAccumulator m_Text;
1152 NPT_XmlAccumulator m_Entity;
1154 // methods
1155 #ifdef NPT_XML_PARSER_DEBUG
1156 const char* StateName(State state) {
1157 switch (state) {
1158 case STATE_IN_INIT: return "IN_INIT";
1159 case STATE_IN_BOM_EF: return "IN_BOM_EF";
1160 case STATE_IN_BOM_BB: return "IN_BOM_BB";
1161 case STATE_IN_WHITESPACE: return "IN_WHITESPACE";
1162 case STATE_IN_NAME: return "IN_NAME";
1163 case STATE_IN_NAME_SPECIAL: return "IN_NAME_SPECIAL";
1164 case STATE_IN_VALUE_START: return "IN_VALUE_START";
1165 case STATE_IN_VALUE: return "IN_VALUE";
1166 case STATE_IN_TAG_START: return "IN_TAG_START";
1167 case STATE_IN_EMPTY_TAG_END: return "IN_EMPTY_TAG_END";
1168 case STATE_IN_CONTENT: return "IN_CONTENT";
1169 case STATE_IN_PROCESSING_INSTRUCTION_START: return "IN_PROCESSING_INSTRUCTION_START";
1170 case STATE_IN_PROCESSING_INSTRUCTION: return "IN_PROCESSING_INSTRUCTION";
1171 case STATE_IN_PROCESSING_INSTRUCTION_END: return "IN_PROCESSING_INSTRUCTION_END";
1172 case STATE_IN_COMMENT: return "IN_COMMENT";
1173 case STATE_IN_COMMENT_END_1: return "IN_COMMENT_END_1";
1174 case STATE_IN_COMMENT_END_2: return "IN_COMMENT_END_2";
1175 case STATE_IN_DTD: return "IN_DTD";
1176 case STATE_IN_DTD_MARKUP_DECL: return "IN_DTD_MARKUP_DECL";
1177 case STATE_IN_DTD_MARKUP_DECL_END: return "IN_DTD_MARKUP_DECL_END";
1178 case STATE_IN_CDATA: return "IN_CDATA";
1179 case STATE_IN_CDATA_END_1: return "IN_CDATA_END_1";
1180 case STATE_IN_CDATA_END_2: return "IN_CDATA_END_2";
1181 case STATE_IN_SPECIAL: return "IN_SPECIAL";
1182 case STATE_IN_ENTITY_REF: return "IN_ENTITY_REF";
1184 return "UNKNOWN";
1187 const char* ContextName(Context context) {
1188 switch (context) {
1189 case CONTEXT_NONE: return "NONE";
1190 case CONTEXT_OPEN_TAG: return "OPEN_TAG";
1191 case CONTEXT_CLOSE_TAG: return "CLOSE_TAG";
1192 case CONTEXT_ATTRIBUTE: return "ATTRIBUTE";
1193 case CONTEXT_VALUE_SINGLE_QUOTE: return "VALUE_SINGLE_QUOTE";
1194 case CONTEXT_VALUE_DOUBLE_QUOTE: return "VALUE_DOUBLE_QUOTE";
1196 return "UNKNOWN";
1198 #endif /* NPT_XML_PARSER_DEBUG */
1200 inline void SetState(State state) {
1201 NPT_XML_Debug_3("\nstate transition: %s to %s [ctx=%s]\n",
1202 StateName(m_State),
1203 StateName(state),
1204 ContextName(m_Context));
1205 m_State = state;
1208 inline void SetState(State state, Context context) {
1209 NPT_XML_Debug_4("\nstate transition: %s [ctx=%s] to %s [ctx=%s]\n",
1210 StateName(m_State),
1211 ContextName(m_Context),
1212 StateName(state),
1213 ContextName(context));
1214 m_State = state;
1215 m_Context = context;
1218 NPT_Result ResolveEntity(NPT_XmlAccumulator& source,
1219 NPT_XmlAccumulator& destination);
1220 NPT_Result FlushPendingText();
1223 /*----------------------------------------------------------------------
1224 | NPT_XmlProcessor::NPT_XmlProcessor
1225 +---------------------------------------------------------------------*/
1226 NPT_XmlProcessor::NPT_XmlProcessor(NPT_XmlParser* parser) :
1227 m_Parser(parser),
1228 m_State(STATE_IN_INIT),
1229 m_Context(CONTEXT_NONE),
1230 m_SkipNewline(false)
1234 /*----------------------------------------------------------------------
1235 | NPT_XmlProcessor::Reset
1236 +---------------------------------------------------------------------*/
1237 void
1238 NPT_XmlProcessor::Reset()
1240 m_State = STATE_IN_INIT;
1241 m_Context = CONTEXT_NONE;
1242 m_SkipNewline = false;
1245 /*----------------------------------------------------------------------
1246 | NPT_XmlProcessor::ResolveEntity
1247 +---------------------------------------------------------------------*/
1248 NPT_Result
1249 NPT_XmlProcessor::ResolveEntity(NPT_XmlAccumulator& source,
1250 NPT_XmlAccumulator& destination)
1252 const char* entity = (const char*)source.GetString();
1254 if (NPT_StringsEqual(entity, "lt")) {
1255 destination.Append('<');
1256 } else if (NPT_StringsEqual(entity, "gt")) {
1257 destination.Append('>');
1258 } else if (NPT_StringsEqual(entity, "amp")) {
1259 destination.Append('&');
1260 } else if (NPT_StringsEqual(entity, "quot")) {
1261 destination.Append('"');
1262 } else if (NPT_StringsEqual(entity, "apos")) {
1263 destination.Append('\'');
1264 } else if (entity[0] == '#') {
1265 int i=1;
1266 int base = 10;
1267 if (entity[1] == 'x') {
1268 i++;
1269 base = 16;
1271 int parsed = 0;
1272 while (char c = entity[i++]) {
1273 int digit = -1;
1274 if (c>='0' && c<='9') {
1275 digit = c-'0';
1276 } else if (base == 16) {
1277 if (c >= 'a' && c <= 'f') {
1278 digit = 10+c-'a';
1279 } else if (c >= 'A' && c <= 'F') {
1280 digit = 10+c-'A';
1283 if (digit == -1) {
1284 // invalid char, leave the entity unparsed
1285 destination.Append(source.GetString());
1286 return NPT_ERROR_INVALID_SYNTAX;
1288 parsed = base*parsed+digit;
1290 destination.AppendUTF8(parsed);
1291 } else {
1292 // unknown entity, leave as-is
1293 destination.Append(source.GetString());
1296 return NPT_SUCCESS;
1299 /*----------------------------------------------------------------------
1300 | NPT_XmlProcessor::FlushPendingText
1301 +---------------------------------------------------------------------*/
1302 NPT_Result
1303 NPT_XmlProcessor::FlushPendingText()
1305 if (m_Text.GetSize() > 0) {
1306 NPT_CHECK(m_Parser->OnCharacterData(m_Text.GetString(),
1307 m_Text.GetSize()));
1308 m_Text.Reset();
1310 return NPT_SUCCESS;
1313 /*----------------------------------------------------------------------
1314 | NPT_XmlProcessor::ProcessBuffer
1315 +---------------------------------------------------------------------*/
1316 NPT_Result
1317 NPT_XmlProcessor::ProcessBuffer(const char* buffer, NPT_Size size)
1319 unsigned char c;
1321 while (size-- && (c = *buffer++)) {
1322 NPT_XML_Debug_1("[%c]", (c == '\n' || c == '\r') ? '#' : c);
1324 // normalize line ends
1325 if (m_SkipNewline) {
1326 m_SkipNewline = false;
1327 if (c == '\n') continue;
1329 if (c == '\r') {
1330 m_SkipNewline = true;
1331 c = '\n';
1334 // process the character
1335 switch (m_State) {
1336 case STATE_IN_INIT:
1337 if (NPT_XML_CHAR_IS_WHITESPACE(c)) {
1338 SetState(STATE_IN_WHITESPACE);
1339 break;
1340 } else if (c == '<') {
1341 SetState(STATE_IN_TAG_START);
1342 break;
1343 } else if (c == 0xEF) {
1344 SetState(STATE_IN_BOM_EF);
1345 break;
1347 return NPT_ERROR_INVALID_SYNTAX;
1349 case STATE_IN_BOM_EF:
1350 if (c == 0xBB) {
1351 SetState(STATE_IN_BOM_BB);
1352 break;
1354 return NPT_ERROR_INVALID_SYNTAX;
1356 case STATE_IN_BOM_BB:
1357 if (c == 0xBF) {
1358 SetState(STATE_IN_WHITESPACE);
1359 break;
1361 return NPT_ERROR_INVALID_SYNTAX;
1363 case STATE_IN_WHITESPACE:
1364 if (NPT_XML_CHAR_IS_WHITESPACE(c)) break;
1365 switch (m_Context) {
1366 case CONTEXT_NONE:
1367 if (c == '<') {
1368 SetState(STATE_IN_TAG_START);
1369 } else {
1370 return NPT_ERROR_INVALID_SYNTAX;
1372 break;
1374 case CONTEXT_ATTRIBUTE:
1375 if (c == '/') {
1376 SetState(STATE_IN_EMPTY_TAG_END, CONTEXT_NONE);
1377 } else if (c == '>') {
1378 SetState(STATE_IN_CONTENT, CONTEXT_NONE);
1379 } else if (NPT_XML_CHAR_IS_NAME_CHAR(c)) {
1380 m_Name.Reset();
1381 m_Name.Append(c);
1382 SetState(STATE_IN_NAME);
1383 } else {
1384 return NPT_ERROR_INVALID_SYNTAX;
1386 break;
1388 case CONTEXT_CLOSE_TAG:
1389 if (c == '>') {
1390 NPT_CHECK(FlushPendingText());
1391 NPT_CHECK(m_Parser->OnEndElement(m_Name.GetString()));
1392 SetState(STATE_IN_CONTENT, CONTEXT_NONE);
1393 } else {
1394 return NPT_ERROR_INVALID_SYNTAX;
1396 break;
1398 default:
1399 return NPT_ERROR_INVALID_SYNTAX;
1401 break;
1403 case STATE_IN_NAME:
1404 if (NPT_XML_CHAR_IS_NAME_CHAR(c)) {
1405 m_Name.Append(c);
1406 break;
1408 switch (m_Context) {
1409 case CONTEXT_ATTRIBUTE:
1410 if (c == '=') {
1411 m_Value.Reset();
1412 SetState(STATE_IN_VALUE_START);
1413 } else if (!NPT_XML_CHAR_IS_WHITESPACE(c)) {
1414 return NPT_ERROR_INVALID_SYNTAX;
1416 break;
1418 case CONTEXT_OPEN_TAG:
1419 if (c == '>' || c == '/' || NPT_XML_CHAR_IS_WHITESPACE(c)) {
1420 NPT_CHECK(FlushPendingText());
1421 NPT_CHECK(m_Parser->OnStartElement(m_Name.GetString()));
1422 m_Name.Reset();
1423 if (c == '>') {
1424 SetState(STATE_IN_CONTENT, CONTEXT_NONE);
1425 } else if (c == '/') {
1426 SetState(STATE_IN_EMPTY_TAG_END);
1427 } else {
1428 SetState(STATE_IN_WHITESPACE, CONTEXT_ATTRIBUTE);
1430 } else {
1431 return NPT_ERROR_INVALID_SYNTAX;
1433 break;
1435 case CONTEXT_CLOSE_TAG:
1436 if (c == '>') {
1437 NPT_CHECK(FlushPendingText());
1438 NPT_CHECK(m_Parser->OnEndElement(m_Name.GetString()));
1439 SetState(STATE_IN_CONTENT, CONTEXT_NONE);
1440 } else if (NPT_XML_CHAR_IS_WHITESPACE(c)) {
1441 SetState(STATE_IN_WHITESPACE);
1442 } else {
1443 return NPT_ERROR_INVALID_SYNTAX;
1445 break;
1447 default:
1448 return NPT_ERROR_INVALID_SYNTAX;
1450 break;
1452 case STATE_IN_NAME_SPECIAL:
1453 if (NPT_XML_CHAR_IS_NAME_CHAR(c) || (c == '[')) {
1454 m_Name.Append(c);
1456 const unsigned char* nb = m_Name.GetBuffer();
1457 if (m_Name.GetSize() == 2) {
1458 if (nb[0] == '-' &&
1459 nb[1] == '-') {
1460 m_Name.Reset();
1461 SetState(STATE_IN_COMMENT, CONTEXT_NONE);
1462 break;
1464 } else if (m_Name.GetSize() == 7) {
1465 if (nb[0] == '[' &&
1466 nb[1] == 'C' &&
1467 nb[2] == 'D' &&
1468 nb[3] == 'A' &&
1469 nb[4] == 'T' &&
1470 nb[5] == 'A' &&
1471 nb[6] == '[') {
1472 m_Name.Reset();
1473 SetState(STATE_IN_CDATA, CONTEXT_NONE);
1474 break;
1477 break;
1479 if (NPT_XML_CHAR_IS_WHITESPACE(c)) {
1480 const char* special = m_Name.GetString();
1481 if (special && NPT_StringsEqual(special, "DOCTYPE")) {
1482 SetState(STATE_IN_DTD, CONTEXT_NONE);
1483 } else {
1484 SetState(STATE_IN_SPECIAL, CONTEXT_NONE);
1486 m_Name.Reset();
1487 } else {
1488 return NPT_ERROR_INVALID_SYNTAX;
1490 break;
1492 case STATE_IN_VALUE_START:
1493 if (NPT_XML_CHAR_IS_WHITESPACE(c)) break;
1494 if (c == '"') {
1495 m_Value.Reset();
1496 SetState(STATE_IN_VALUE, CONTEXT_VALUE_DOUBLE_QUOTE);
1497 } else if (c == '\'') {
1498 m_Value.Reset();
1499 SetState(STATE_IN_VALUE, CONTEXT_VALUE_SINGLE_QUOTE);
1500 } else {
1501 return NPT_ERROR_INVALID_SYNTAX;
1503 break;
1505 case STATE_IN_VALUE:
1506 if ((c == '"' && m_Context == CONTEXT_VALUE_DOUBLE_QUOTE) ||
1507 (c == '\'' && m_Context == CONTEXT_VALUE_SINGLE_QUOTE)) {
1508 NPT_CHECK(m_Parser->OnElementAttribute(m_Name.GetString(),
1509 m_Value.GetString()));
1510 SetState(STATE_IN_WHITESPACE, CONTEXT_ATTRIBUTE);
1511 } else if (c == '&') {
1512 m_Entity.Reset();
1513 SetState(STATE_IN_ENTITY_REF);
1514 } else if (NPT_XML_CHAR_IS_WHITESPACE(c)) {
1515 m_Value.Append(' ');
1516 } else if (NPT_XML_CHAR_IS_VALUE_CHAR(c)) {
1517 m_Value.Append(c);
1518 } else {
1519 return NPT_ERROR_INVALID_SYNTAX;
1521 break;
1523 case STATE_IN_TAG_START:
1524 m_Name.Reset();
1525 if (c == '!') {
1526 SetState(STATE_IN_NAME_SPECIAL, CONTEXT_NONE);
1527 } else if (c == '?') {
1528 SetState(STATE_IN_PROCESSING_INSTRUCTION, CONTEXT_NONE);
1529 } else if (c == '/') {
1530 SetState(STATE_IN_NAME, CONTEXT_CLOSE_TAG);
1531 } else if (NPT_XML_CHAR_IS_NAME_CHAR(c)) {
1532 m_Name.Append(c);
1533 SetState(STATE_IN_NAME, CONTEXT_OPEN_TAG);
1534 } else {
1535 return NPT_ERROR_INVALID_SYNTAX;
1537 break;
1539 case STATE_IN_EMPTY_TAG_END:
1540 if (c == '>') {
1541 NPT_CHECK(FlushPendingText());
1542 NPT_CHECK(m_Parser->OnEndElement(NULL));
1543 SetState(STATE_IN_CONTENT, CONTEXT_NONE);
1544 } else {
1545 return NPT_ERROR_INVALID_SYNTAX;
1547 break;
1549 case STATE_IN_ENTITY_REF:
1550 switch (m_Context) {
1551 case CONTEXT_VALUE_SINGLE_QUOTE:
1552 case CONTEXT_VALUE_DOUBLE_QUOTE:
1553 if (c == ';') {
1554 NPT_CHECK(ResolveEntity(m_Entity, m_Value));
1555 SetState(STATE_IN_VALUE);
1556 } else if (NPT_XML_CHAR_IS_ENTITY_REF_CHAR(c)) {
1557 m_Entity.Append(c);
1558 } else {
1559 return NPT_ERROR_INVALID_SYNTAX;
1561 break;
1563 case CONTEXT_NONE:
1564 if (c == ';') {
1565 NPT_CHECK(ResolveEntity(m_Entity, m_Text));
1566 SetState(STATE_IN_CONTENT);
1567 } else if (NPT_XML_CHAR_IS_ENTITY_REF_CHAR(c)) {
1568 m_Entity.Append(c);
1569 } else {
1570 return NPT_ERROR_INVALID_SYNTAX;
1572 break;
1574 default:
1575 return NPT_ERROR_INVALID_SYNTAX;
1577 break;
1579 case STATE_IN_COMMENT:
1580 if (c == '-') {
1581 SetState(STATE_IN_COMMENT_END_1);
1582 } else if (!NPT_XML_CHAR_IS_ANY_CHAR(c)) {
1583 return NPT_ERROR_INVALID_SYNTAX;
1585 break;
1587 case STATE_IN_COMMENT_END_1:
1588 if (c == '-') {
1589 SetState(STATE_IN_COMMENT_END_2);
1590 } else if (NPT_XML_CHAR_IS_ANY_CHAR(c)) {
1591 SetState(STATE_IN_COMMENT);
1592 } else {
1593 return NPT_ERROR_INVALID_SYNTAX;
1595 break;
1597 case STATE_IN_COMMENT_END_2:
1598 if (c == '>') {
1599 SetState(STATE_IN_CONTENT, CONTEXT_NONE);
1600 } else {
1601 return NPT_ERROR_INVALID_SYNTAX;
1603 break;
1605 case STATE_IN_CONTENT:
1606 if (c == '<') {
1607 SetState(STATE_IN_TAG_START, CONTEXT_NONE);
1608 } else if (c == '&') {
1609 m_Entity.Reset();
1610 SetState(STATE_IN_ENTITY_REF);
1611 } else {
1612 m_Text.Append(c);
1614 break;
1616 case STATE_IN_PROCESSING_INSTRUCTION_START:
1617 break;
1619 case STATE_IN_PROCESSING_INSTRUCTION_END:
1620 if (c == '>') {
1621 SetState(STATE_IN_WHITESPACE, CONTEXT_NONE);
1622 } else {
1623 return NPT_ERROR_INVALID_SYNTAX;
1625 break;
1627 case STATE_IN_PROCESSING_INSTRUCTION:
1628 if (c == '?') {
1629 SetState(STATE_IN_PROCESSING_INSTRUCTION_END);
1631 break;
1633 case STATE_IN_DTD:
1634 if (NPT_XML_CHAR_IS_WHITESPACE(c)) break;
1635 if (c == '[') {
1636 SetState(STATE_IN_DTD_MARKUP_DECL);
1637 } else if (c == '>') {
1638 SetState(STATE_IN_WHITESPACE, CONTEXT_NONE);
1640 break;
1642 case STATE_IN_DTD_MARKUP_DECL:
1643 if (c == ']') {
1644 SetState(STATE_IN_DTD_MARKUP_DECL_END);
1646 break;
1648 case STATE_IN_DTD_MARKUP_DECL_END:
1649 if (c == '>') {
1650 SetState(STATE_IN_WHITESPACE, CONTEXT_NONE);
1651 } else if (!NPT_XML_CHAR_IS_WHITESPACE(c)) {
1652 return NPT_ERROR_INVALID_SYNTAX;
1654 break;
1656 case STATE_IN_CDATA:
1657 if (c == ']') {
1658 SetState(STATE_IN_CDATA_END_1);
1659 } else {
1660 m_Text.Append(c);
1662 break;
1664 case STATE_IN_CDATA_END_1:
1665 if (c == ']') {
1666 SetState(STATE_IN_CDATA_END_2);
1667 } else {
1668 m_Text.Append(']');
1669 m_Text.Append(c);
1670 SetState(STATE_IN_CDATA);
1672 break;
1674 case STATE_IN_CDATA_END_2:
1675 if (c == '>') {
1676 SetState(STATE_IN_CONTENT, CONTEXT_NONE);
1677 } else {
1678 m_Text.Append("]]");
1679 m_Text.Append(c);
1680 SetState(STATE_IN_CDATA);
1682 break;
1684 case STATE_IN_SPECIAL:
1685 if (c == '>') {
1686 SetState(STATE_IN_WHITESPACE, CONTEXT_NONE);
1688 break;
1692 return NPT_SUCCESS;
1695 /*----------------------------------------------------------------------
1696 | NPT_XmlParser::NPT_XmlParser
1697 +---------------------------------------------------------------------*/
1698 NPT_XmlParser::NPT_XmlParser(bool keep_whitespace /* = false */) :
1699 m_Root(NULL),
1700 m_CurrentElement(NULL),
1701 m_KeepWhitespace(keep_whitespace)
1703 m_Processor = new NPT_XmlProcessor(this);
1706 /*----------------------------------------------------------------------
1707 | NPT_XmlParser::~NPT_XmlParser
1708 +---------------------------------------------------------------------*/
1709 NPT_XmlParser::~NPT_XmlParser()
1711 Reset();
1712 delete m_CurrentElement;
1713 delete m_Processor;
1716 /*----------------------------------------------------------------------
1717 | NPT_XmlParser::Reset
1718 +---------------------------------------------------------------------*/
1719 void
1720 NPT_XmlParser::Reset()
1722 // delete anything that has been created
1723 NPT_XmlNode* walker = m_CurrentElement;
1724 while (walker && walker->GetParent()) {
1725 walker = walker->GetParent();
1727 delete walker;
1728 m_CurrentElement = NULL;
1730 m_Processor->Reset();
1732 m_Root = NULL;
1735 /*----------------------------------------------------------------------
1736 | NPT_XmlParser::Parse
1737 +---------------------------------------------------------------------*/
1738 NPT_Result
1739 NPT_XmlParser::Parse(NPT_InputStream& stream,
1740 NPT_Size& size,
1741 NPT_XmlNode*& node,
1742 bool incremental /* = false */)
1744 NPT_Result result;
1746 // start with a known state
1747 m_Root = NULL;
1748 node = NULL;
1749 if (!incremental) {
1750 Reset();
1753 // use a buffer on the stack
1754 char buffer[1024];
1756 // read a buffer and parse it until the end of the stream
1757 NPT_Size max_bytes_to_read = size;
1758 size = 0;
1759 do {
1760 NPT_Size bytes_read;
1761 NPT_Size bytes_to_read = sizeof(buffer);
1762 if (max_bytes_to_read != 0 &&
1763 size+bytes_to_read > max_bytes_to_read) {
1764 bytes_to_read = max_bytes_to_read-size;
1766 result = stream.Read(buffer, bytes_to_read, &bytes_read);
1767 if (NPT_SUCCEEDED(result)) {
1768 // update the counter
1769 size += bytes_read;
1771 // parse the buffer
1772 result = m_Processor->ProcessBuffer(buffer, bytes_read);
1773 if (NPT_FAILED(result)) break;
1774 } else {
1775 break;
1777 } while(NPT_SUCCEEDED(result) &&
1778 (max_bytes_to_read == 0 || size < max_bytes_to_read));
1780 // return a tree if we have one
1781 node = m_Root;
1782 if (incremental) {
1783 return result;
1784 } else {
1785 if (NPT_FAILED(result) && result != NPT_ERROR_EOS) {
1786 delete m_Root;
1787 m_Root = NULL;
1788 node = NULL;
1789 return result;
1790 } else {
1791 return m_Root?NPT_SUCCESS:NPT_ERROR_XML_NO_ROOT;
1796 /*----------------------------------------------------------------------
1797 | NPT_XmlParser::Parse
1798 +---------------------------------------------------------------------*/
1799 NPT_Result
1800 NPT_XmlParser::Parse(NPT_InputStream& stream,
1801 NPT_XmlNode*& node,
1802 bool incremental /* = false */)
1804 NPT_Size max_read = 0; // no limit
1805 return Parse(stream, max_read, node, incremental);
1808 /*----------------------------------------------------------------------
1809 | NPT_XmlParser::Parse
1810 +---------------------------------------------------------------------*/
1811 NPT_Result
1812 NPT_XmlParser::Parse(const char* xml,
1813 NPT_XmlNode*& node,
1814 bool incremental /* = false */)
1816 NPT_Size size = NPT_StringLength(xml);
1818 return Parse(xml, size, node, incremental);
1821 /*----------------------------------------------------------------------
1822 | NPT_XmlParser::Parse
1823 +---------------------------------------------------------------------*/
1824 NPT_Result
1825 NPT_XmlParser::Parse(const char* xml,
1826 NPT_Size size,
1827 NPT_XmlNode*& node,
1828 bool incremental /* = false */)
1830 // start with a known state
1831 m_Root = NULL;
1832 node = NULL;
1833 if (!incremental) {
1834 Reset();
1837 // parse the buffer
1838 NPT_Result result = m_Processor->ProcessBuffer(xml, size);
1840 // return a tree if we have one
1841 node = m_Root;
1842 if (incremental) {
1843 return result;
1844 } else {
1845 if (NPT_FAILED(result)) {
1846 delete m_Root;
1847 m_Root = NULL;
1848 node = NULL;
1849 return result;
1850 } else {
1851 return m_Root?NPT_SUCCESS:NPT_ERROR_XML_NO_ROOT;
1856 /*----------------------------------------------------------------------
1857 | NPT_XmlParser::OnStartElement
1858 +---------------------------------------------------------------------*/
1859 NPT_Result
1860 NPT_XmlParser::OnStartElement(const char* name)
1862 NPT_XML_Debug_1("\nNPT_XmlParser::OnStartElement: %s\n", name);
1864 // we cannot start an element if we already have a root
1865 if (m_Root) {
1866 return NPT_ERROR_XML_MULTIPLE_ROOTS;
1869 // create new node
1870 NPT_XmlElementNode* node = new NPT_XmlElementNode(name);
1872 // add node to tree
1873 if (m_CurrentElement) {
1874 // add the new node
1875 m_CurrentElement->AddChild(node);
1877 m_CurrentElement = node;
1879 return NPT_SUCCESS;
1882 /*----------------------------------------------------------------------
1883 | NPT_XmlParser::OnElementAttribute
1884 +---------------------------------------------------------------------*/
1885 NPT_Result
1886 NPT_XmlParser::OnElementAttribute(const char* name, const char* value)
1888 NPT_XML_Debug_2("\nNPT_XmlParser::OnElementAttribute: name=%s, value='%s'\n",
1889 name, value);
1891 if (m_CurrentElement == NULL) {
1892 return NPT_ERROR_INVALID_SYNTAX;
1895 // check if this is a namespace attribute
1896 if (name[0] == 'x' &&
1897 name[1] == 'm' &&
1898 name[2] == 'l' &&
1899 name[3] == 'n' &&
1900 name[4] == 's' &&
1901 (name[5] == '\0' || name[5] == ':')) {
1902 // namespace definition
1903 m_CurrentElement->SetNamespaceUri((name[5] == ':')?name+6:"", value);
1904 } else {
1905 m_CurrentElement->AddAttribute(name, value);
1908 return NPT_SUCCESS;
1911 /*----------------------------------------------------------------------
1912 | NPT_XmlParser::OnEndElement
1913 +---------------------------------------------------------------------*/
1914 NPT_Result
1915 NPT_XmlParser::OnEndElement(const char* name)
1917 NPT_XML_Debug_1("\nNPT_XmlParser::OnEndElement: %s\n", name ? name : "NULL");
1919 if (m_CurrentElement == NULL) return NPT_ERROR_XML_TAG_MISMATCH;
1921 // check that the name matches (if there is a name)
1922 if (name) {
1923 const char* prefix = name;
1924 unsigned int prefix_length = 0;
1925 const char* tag = name;
1926 const char* cursor = name;
1927 while (char c = *cursor++) {
1928 if (c == ':') {
1929 prefix_length = (unsigned int)(cursor-name)-1;
1930 tag = cursor;
1933 // check that the name and prefix length match
1934 if (m_CurrentElement->GetTag() != tag ||
1935 m_CurrentElement->GetPrefix().GetLength() != prefix_length) {
1936 return NPT_ERROR_XML_TAG_MISMATCH;
1939 // check the prefix
1940 const char* current_prefix = m_CurrentElement->GetPrefix().GetChars();
1941 for (unsigned int i=0; i<prefix_length; i++) {
1942 if (current_prefix[i] != prefix[i]) {
1943 return NPT_ERROR_XML_TAG_MISMATCH;
1948 // pop up the stack
1949 NPT_XmlNode* parent = m_CurrentElement->GetParent();
1950 if (parent) {
1951 m_CurrentElement = parent->AsElementNode();
1952 } else {
1953 if (m_Root) {
1954 // this should never happen
1955 delete m_CurrentElement;
1956 m_CurrentElement = NULL;
1957 return NPT_ERROR_XML_MULTIPLE_ROOTS;
1958 } else {
1959 m_Root = m_CurrentElement;
1960 m_CurrentElement = NULL;
1964 return NPT_SUCCESS;
1967 /*----------------------------------------------------------------------
1968 | NPT_XmlParser::OnCharacterData
1969 +---------------------------------------------------------------------*/
1970 NPT_Result
1971 NPT_XmlParser::OnCharacterData(const char* data, NPT_Size size)
1973 NPT_XML_Debug_1("\nNPT_XmlParser::OnCharacterData: %s\n", data);
1975 // check that we have a current element
1976 if (m_CurrentElement == NULL) {
1977 // we do not allow non-whitespace outside an element content
1978 if (!NPT_XmlStringIsWhitespace(data, size)) {
1979 return NPT_ERROR_XML_INVALID_NESTING;
1982 // ignore whitespace
1983 return NPT_SUCCESS;
1986 // ignore whitespace if applicable
1987 if (m_KeepWhitespace || !NPT_XmlStringIsWhitespace(data, size)) {
1988 // add the text to the current element
1989 m_CurrentElement->AddText(data);
1992 return NPT_SUCCESS;
1995 /*----------------------------------------------------------------------
1996 | NPT_XmlAttributeWriter
1997 +---------------------------------------------------------------------*/
1998 class NPT_XmlAttributeWriter
2000 public:
2001 NPT_XmlAttributeWriter(NPT_XmlSerializer& serializer) : m_Serializer(serializer) {}
2002 void operator()(NPT_XmlAttribute*& attribute) const {
2003 m_Serializer.Attribute(attribute->GetPrefix(),
2004 attribute->GetName(),
2005 attribute->GetValue());
2008 private:
2009 // members
2010 NPT_XmlSerializer& m_Serializer;
2013 /*----------------------------------------------------------------------
2014 | NPT_XmlNodeWriter
2015 +---------------------------------------------------------------------*/
2016 class NPT_XmlNodeWriter
2018 public:
2019 NPT_XmlNodeWriter(NPT_XmlSerializer& serializer) :
2020 m_Serializer(serializer), m_AttributeWriter(serializer) {
2021 m_Serializer.StartDocument();
2023 void operator()(NPT_XmlNode*& node) const {
2024 if (NPT_XmlElementNode* element = node->AsElementNode()) {
2025 const NPT_String& prefix = element->GetPrefix();
2026 const NPT_String& tag = element->GetTag();
2027 m_Serializer.StartElement(prefix, tag);
2028 element->GetAttributes().Apply(m_AttributeWriter);
2030 // emit namespace attributes
2031 if (element->m_NamespaceMap) {
2032 NPT_List<NPT_XmlNamespaceMap::Entry*>::Iterator item =
2033 element->m_NamespaceMap->m_Entries.GetFirstItem();
2034 while (item) {
2035 if ((*item)->m_Prefix.IsEmpty()) {
2036 // default namespace
2037 m_Serializer.Attribute(NULL, "xmlns", (*item)->m_Uri);
2038 } else {
2039 // namespace with prefix
2040 m_Serializer.Attribute("xmlns", (*item)->m_Prefix, (*item)->m_Uri);
2042 ++item;
2046 element->GetChildren().Apply(*this);
2047 m_Serializer.EndElement(prefix, tag);
2048 } else if (NPT_XmlTextNode* text = node->AsTextNode()) {
2049 m_Serializer.Text(text->GetString());
2053 private:
2054 // members
2055 NPT_XmlSerializer& m_Serializer;
2056 NPT_XmlAttributeWriter m_AttributeWriter;
2059 /*----------------------------------------------------------------------
2060 | NPT_XmlNodeCanonicalWriter
2061 +---------------------------------------------------------------------*/
2062 class NPT_XmlNodeCanonicalWriter
2064 public:
2065 // types
2066 struct MapChainLink {
2067 MapChainLink(MapChainLink* parent) : m_Parent(parent) {}
2068 MapChainLink* m_Parent;
2069 NPT_Map<NPT_String, NPT_String> m_RenderedNamespaces;
2072 // constructor
2073 NPT_XmlNodeCanonicalWriter(NPT_XmlSerializer& serializer,
2074 MapChainLink* map_chain = NULL) :
2075 m_MapChain(map_chain),
2076 m_Serializer(serializer) {
2077 m_Serializer.StartDocument();
2079 void operator()(NPT_XmlNode*& node) const;
2081 private:
2082 // types
2083 struct SortedAttributeList {
2084 // types
2085 struct Entry {
2086 const NPT_String* m_NamespaceUri;
2087 const NPT_XmlAttribute* m_Attribute;
2090 // methods
2091 void Add(const NPT_String* namespace_uri,
2092 const NPT_XmlAttribute* attribute);
2093 void Emit(NPT_XmlSerializer& serializer);
2095 // members
2096 NPT_List<Entry> m_Entries;
2099 struct SortedNamespaceList {
2100 // types
2101 struct Entry {
2102 const NPT_String* m_NamespacePrefix;
2103 const NPT_String* m_NamespaceUri;
2106 // methods
2107 void Add(const NPT_String* prefix, const NPT_String* uri);
2108 void Emit(NPT_XmlSerializer& serializer);
2110 // members
2111 NPT_List<Entry> m_Entries;
2114 // methods
2115 const NPT_String* GetNamespaceRenderedForPrefix(const NPT_String& prefix) const;
2117 // members
2118 MapChainLink* m_MapChain;
2119 NPT_XmlSerializer& m_Serializer;
2122 /*----------------------------------------------------------------------
2123 | NPT_XmlNodeCanonicalWriter::SortedAttributeList::Add
2124 +---------------------------------------------------------------------*/
2125 void
2126 NPT_XmlNodeCanonicalWriter::SortedAttributeList::Add(
2127 const NPT_String* namespace_uri,
2128 const NPT_XmlAttribute* attribute)
2130 // transform empty strings into NULL pointers
2131 if (namespace_uri && namespace_uri->IsEmpty()) namespace_uri = NULL;
2133 // find the namespace insertion position
2134 NPT_List<Entry>::Iterator entry = m_Entries.GetFirstItem();
2135 for (; entry; ++entry) {
2136 // decide if we insert now or move on
2137 const NPT_String* other_namespace_uri = entry->m_NamespaceUri;
2138 if (namespace_uri &&
2139 (other_namespace_uri == NULL || *namespace_uri > *other_namespace_uri)) {
2140 // this namespace uri is greater than the other, skip
2141 continue;
2142 } else if ((namespace_uri == NULL && other_namespace_uri == NULL) ||
2143 (namespace_uri && other_namespace_uri &&
2144 *namespace_uri == *other_namespace_uri)) {
2145 // namespace uris match, compare the names
2146 const NPT_XmlAttribute* other_attribute = entry->m_Attribute;
2147 if (attribute->GetName() > other_attribute->GetName()) continue;
2149 break;
2152 Entry new_entry = {namespace_uri, attribute};
2153 m_Entries.Insert(entry, new_entry);
2156 /*----------------------------------------------------------------------
2157 | NPT_XmlNodeCanonicalWriter::SortedAttributeList::Emit
2158 +---------------------------------------------------------------------*/
2159 void
2160 NPT_XmlNodeCanonicalWriter::SortedAttributeList::Emit(NPT_XmlSerializer& serializer)
2162 for (NPT_List<Entry>::Iterator i = m_Entries.GetFirstItem(); i; ++i) {
2163 serializer.Attribute(i->m_Attribute->GetPrefix(),
2164 i->m_Attribute->GetName(),
2165 i->m_Attribute->GetValue());
2169 /*----------------------------------------------------------------------
2170 | NPT_XmlNodeCanonicalWriter::SortedNamespaceList::Add
2171 +---------------------------------------------------------------------*/
2172 void
2173 NPT_XmlNodeCanonicalWriter::SortedNamespaceList::Add(const NPT_String* prefix,
2174 const NPT_String* uri)
2176 // find the namespace insertion position
2177 NPT_List<Entry>::Iterator entry = m_Entries.GetFirstItem();
2178 if (prefix && !prefix->IsEmpty()) {
2179 for (; entry; ++entry) {
2180 // decide if we insert now or move on
2181 if (entry->m_NamespacePrefix && *prefix <= *entry->m_NamespacePrefix) {
2182 break;
2185 } else {
2186 prefix = NULL;
2189 Entry new_entry = {prefix, uri};
2190 m_Entries.Insert(entry, new_entry);
2193 /*----------------------------------------------------------------------
2194 | NPT_XmlNodeCanonicalWriter::SortedNamespaceList::Emit
2195 +---------------------------------------------------------------------*/
2196 void
2197 NPT_XmlNodeCanonicalWriter::SortedNamespaceList::Emit(NPT_XmlSerializer& serializer)
2199 for (NPT_List<Entry>::Iterator i = m_Entries.GetFirstItem(); i; ++i) {
2200 const NPT_String* key = i->m_NamespacePrefix;
2201 const NPT_String* value = i->m_NamespaceUri;
2202 if (key == NULL) {
2203 serializer.Attribute(NULL, "xmlns", *value);
2204 } else if (*key != "xml" || *value != NPT_XmlNamespaceUri_Xml) {
2205 serializer.Attribute("xmlns", *key, *value);
2210 /*----------------------------------------------------------------------
2211 | NPT_XmlNodeCanonicalWriter::GetNamespaceRenderedForPrefix
2212 +---------------------------------------------------------------------*/
2213 const NPT_String*
2214 NPT_XmlNodeCanonicalWriter::GetNamespaceRenderedForPrefix(const NPT_String& prefix) const
2216 for (MapChainLink* link = m_MapChain;
2217 link;
2218 link = link->m_Parent) {
2219 NPT_String* uri;
2220 if (NPT_SUCCEEDED(link->m_RenderedNamespaces.Get(prefix, uri))) {
2221 return uri;
2225 return NULL;
2228 /*----------------------------------------------------------------------
2229 | NPT_XmlNodeCanonicalWriter::operator()
2230 +---------------------------------------------------------------------*/
2231 void
2232 NPT_XmlNodeCanonicalWriter::operator()(NPT_XmlNode*& node) const
2234 MapChainLink map_link(m_MapChain);
2236 if (NPT_XmlElementNode* element = node->AsElementNode()) {
2237 const NPT_String& prefix = element->GetPrefix();
2238 const NPT_String& tag = element->GetTag();
2240 // process namespaces
2241 const NPT_String* namespace_uri = element->GetNamespace();
2242 const NPT_String* rendered = GetNamespaceRenderedForPrefix(prefix);
2243 if (namespace_uri && namespace_uri->IsEmpty()) namespace_uri = NULL;
2244 if (prefix.IsEmpty()) {
2245 // default namespace
2246 if (rendered == NULL) {
2247 // default namespace not rendered
2248 if (namespace_uri) {
2249 map_link.m_RenderedNamespaces.Put("", *namespace_uri);
2251 } else {
2252 // default namespace already rendered
2253 const char* compare;
2254 if (namespace_uri) {
2255 compare = namespace_uri->GetChars();
2256 } else {
2257 compare = "";
2259 if (*rendered != compare) {
2260 // the rendered default namespace had a different uri
2261 map_link.m_RenderedNamespaces.Put("", compare);
2264 } else {
2265 // explicit namespace
2266 // NOTE: namespace_uri should not be an empty string, but we test just
2267 // in case the XML document is not compliant
2268 if (namespace_uri && (rendered == NULL || *rendered != *namespace_uri)) {
2269 // namespace prefix not rendered or rendered with a different value
2270 map_link.m_RenderedNamespaces.Put(prefix, *namespace_uri);
2274 // process attributes
2275 SortedAttributeList prefixed_attributes;
2276 SortedAttributeList naked_attributes;
2277 for (NPT_List<NPT_XmlAttribute*>::Iterator attribute = element->GetAttributes().GetFirstItem();
2278 attribute;
2279 ++attribute) {
2280 const NPT_String& a_prefix = (*attribute)->GetPrefix();
2281 if (a_prefix.IsEmpty()) {
2282 // naked attribute
2283 naked_attributes.Add(NULL, *attribute);
2284 } else {
2285 // decide if we need to render this namespace declaration
2286 namespace_uri = element->GetNamespaceUri(a_prefix);
2287 if (namespace_uri) {
2288 rendered = GetNamespaceRenderedForPrefix(a_prefix);;
2289 if (rendered == NULL || *rendered != *namespace_uri) {
2290 // namespace not rendered or rendered with a different value
2291 map_link.m_RenderedNamespaces.Put(a_prefix, *namespace_uri);
2293 prefixed_attributes.Add(namespace_uri, *attribute);
2298 // start of element
2299 m_Serializer.StartElement(prefix, tag);
2301 // namespace declarations
2302 if (map_link.m_RenderedNamespaces.GetEntryCount()) {
2303 SortedNamespaceList namespaces;
2304 NPT_List<NPT_Map<NPT_String, NPT_String>::Entry*>::Iterator entry =
2305 map_link.m_RenderedNamespaces.GetEntries().GetFirstItem();
2306 while (entry) {
2307 const NPT_String& key = (*entry)->GetKey();
2308 const NPT_String& value = (*entry)->GetValue();
2309 namespaces.Add(&key, &value);
2310 ++entry;
2312 namespaces.Emit(m_Serializer);
2315 // attributes
2316 naked_attributes.Emit(m_Serializer);
2317 prefixed_attributes.Emit(m_Serializer);
2319 // children
2320 MapChainLink* chain;
2321 if (map_link.m_RenderedNamespaces.GetEntryCount()) {
2322 chain = &map_link;
2323 } else {
2324 chain = m_MapChain;
2326 element->GetChildren().Apply(NPT_XmlNodeCanonicalWriter(m_Serializer, chain));
2328 // end of element
2329 m_Serializer.EndElement(prefix, tag);
2330 } else if (NPT_XmlTextNode* text = node->AsTextNode()) {
2331 m_Serializer.Text(text->GetString());
2335 /*----------------------------------------------------------------------
2336 | NPT_XmlSerializer::NPT_XmlSerializer
2337 +---------------------------------------------------------------------*/
2338 NPT_XmlSerializer::NPT_XmlSerializer(NPT_OutputStream* output,
2339 NPT_Cardinal indentation,
2340 bool shrink_empty_elements,
2341 bool add_xml_decl) :
2342 m_Output(output),
2343 m_ElementPending(false),
2344 m_Depth(0),
2345 m_Indentation(indentation),
2346 m_ElementHasText(false),
2347 m_ShrinkEmptyElements(shrink_empty_elements),
2348 m_AddXmlDecl(add_xml_decl)
2352 /*----------------------------------------------------------------------
2353 | NPT_XmlSerializer::~NPT_XmlSerializer
2354 +---------------------------------------------------------------------*/
2355 NPT_XmlSerializer::~NPT_XmlSerializer()
2359 /*----------------------------------------------------------------------
2360 | NPT_XmlSerializer::StartDocument
2361 +---------------------------------------------------------------------*/
2362 NPT_Result
2363 NPT_XmlSerializer::StartDocument()
2365 if (!m_AddXmlDecl) return NPT_SUCCESS;
2367 return m_Output->WriteString("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n");
2370 /*----------------------------------------------------------------------
2371 | NPT_XmlSerializer::EndDocument
2372 +---------------------------------------------------------------------*/
2373 NPT_Result
2374 NPT_XmlSerializer::EndDocument()
2376 return m_ElementPending?NPT_ERROR_INVALID_STATE:NPT_SUCCESS;
2379 /*----------------------------------------------------------------------
2380 | NPT_XmlSerializer::EscapeChar
2381 +---------------------------------------------------------------------*/
2382 void
2383 NPT_XmlSerializer::EscapeChar(unsigned char c, char* text)
2385 *text++ = '&';
2386 *text++ = '#';
2387 *text++ = 'x';
2388 int c0 = c>>4;
2389 int c1 = c&0xF;
2390 if (c0) {
2391 *text++ = c0 >= 10 ? 'A'+(c0-10) : '0'+c0;
2393 *text++ = c1 >= 10 ? 'A'+(c1-10) : '0'+c1;
2394 *text++ = ';';
2395 *text = '\0';
2398 /*----------------------------------------------------------------------
2399 | NPT_XmlSerializer::ProcessPending
2400 +---------------------------------------------------------------------*/
2401 NPT_Result
2402 NPT_XmlSerializer::ProcessPending()
2404 if (!m_ElementPending) return NPT_SUCCESS;
2405 m_ElementPending = false;
2406 return m_Output->Write(">", 1);
2409 /*----------------------------------------------------------------------
2410 | NPT_XmlSerializer::OutputEscapedString
2411 +---------------------------------------------------------------------*/
2412 NPT_Result
2413 NPT_XmlSerializer::OutputEscapedString(const char* text, bool attribute)
2415 const char* start = text;
2416 char escaped[7];
2417 while (char c = *text) {
2418 const char* insert = NULL;
2419 switch (c) {
2420 case '\r': {
2421 EscapeChar(c, escaped);
2422 insert = escaped;
2423 break;
2425 case '\n':
2426 case '\t':
2427 if (attribute) {
2428 EscapeChar(c, escaped);
2429 insert = escaped;
2431 break;
2433 case '&' : insert = "&amp;"; break;
2434 case '<' : insert = "&lt;"; break;
2435 case '>' : if (!attribute) insert = "&gt;"; break;
2436 case '"' : if (attribute) insert = "&quot;"; break;
2437 default :
2438 break;
2440 if (insert) {
2441 // output pending chars
2442 if (start != text) m_Output->WriteFully(start, (NPT_Size)(text-start));
2443 m_Output->WriteString(insert);
2444 start = ++text;
2445 } else {
2446 ++text;
2449 if (start != text) {
2450 m_Output->WriteFully(start, (NPT_Size)(text-start));
2453 return NPT_SUCCESS;
2456 /*----------------------------------------------------------------------
2457 | NPT_XmlSerializer::OutputIndentation
2458 +---------------------------------------------------------------------*/
2459 void
2460 NPT_XmlSerializer::OutputIndentation(bool start)
2462 if (m_Depth || !start) m_Output->Write("\r\n", 2);
2464 // ensure we have enough chars in the prefix string
2465 unsigned int prefix_length = m_Indentation*m_Depth;
2466 if (m_IndentationPrefix.GetLength() < prefix_length) {
2467 unsigned int needed = prefix_length-m_IndentationPrefix.GetLength();
2468 for (unsigned int i=0; i<needed; i+=16) {
2469 m_IndentationPrefix.Append(" ", 16);
2473 // print the indentation prefix
2474 m_Output->WriteFully(m_IndentationPrefix.GetChars(), prefix_length);
2477 /*----------------------------------------------------------------------
2478 | NPT_XmlSerializer::StartElement
2479 +---------------------------------------------------------------------*/
2480 NPT_Result
2481 NPT_XmlSerializer::StartElement(const char* prefix, const char* name)
2483 ProcessPending();
2484 if (m_Indentation) OutputIndentation(true);
2485 m_ElementPending = true;
2486 m_ElementHasText = false;
2487 m_Depth++;
2488 m_Output->Write("<", 1);
2489 if (prefix && prefix[0]) {
2490 m_Output->WriteString(prefix);
2491 m_Output->Write(":", 1);
2493 return m_Output->WriteString(name);
2496 /*----------------------------------------------------------------------
2497 | NPT_XmlSerializer::EndElement
2498 +---------------------------------------------------------------------*/
2499 NPT_Result
2500 NPT_XmlSerializer::EndElement(const char* prefix, const char* name)
2502 m_Depth--;
2504 if (m_ElementPending) {
2505 // this element has no children
2506 m_ElementPending = false;
2507 if (m_ShrinkEmptyElements) {
2508 return m_Output->WriteFully("/>", 2);
2509 } else {
2510 m_Output->Write(">",1);
2514 if (m_Indentation && !m_ElementHasText) OutputIndentation(false);
2515 m_ElementHasText = false;
2516 m_Output->WriteFully("</", 2);
2517 if (prefix && prefix[0]) {
2518 m_Output->WriteString(prefix);
2519 m_Output->Write(":", 1);
2521 m_Output->WriteString(name);
2522 return m_Output->Write(">", 1);
2525 /*----------------------------------------------------------------------
2526 | NPT_XmlSerializer::Attribute
2527 +---------------------------------------------------------------------*/
2528 NPT_Result
2529 NPT_XmlSerializer::Attribute(const char* prefix, const char* name, const char* value)
2531 m_Output->Write(" ", 1);
2532 if (prefix && prefix[0]) {
2533 m_Output->WriteString(prefix);
2534 m_Output->Write(":", 1);
2536 m_Output->WriteString(name);
2537 m_Output->WriteFully("=\"", 2);
2538 OutputEscapedString(value, true);
2539 return m_Output->Write("\"", 1);
2542 /*----------------------------------------------------------------------
2543 | NPT_XmlSerializer::Text
2544 +---------------------------------------------------------------------*/
2545 NPT_Result
2546 NPT_XmlSerializer::Text(const char* text)
2548 ProcessPending();
2549 m_ElementHasText = true;
2550 return OutputEscapedString(text, false);
2553 /*----------------------------------------------------------------------
2554 | NPT_XmlSerializer::CdataSection
2555 +---------------------------------------------------------------------*/
2556 NPT_Result
2557 NPT_XmlSerializer::CdataSection(const char* data)
2559 ProcessPending();
2560 m_ElementHasText = true;
2561 m_Output->WriteFully("<![CDATA[", 9);
2562 m_Output->WriteString(data);
2563 return m_Output->WriteFully("]]>", 3);
2566 /*----------------------------------------------------------------------
2567 | NPT_XmlSerializer::Comment
2568 +---------------------------------------------------------------------*/
2569 NPT_Result
2570 NPT_XmlSerializer::Comment(const char* comment)
2572 ProcessPending();
2573 m_Output->WriteFully("<!--", 4);
2574 m_Output->WriteString(comment);
2575 return m_Output->WriteFully("-->", 3);
2578 /*----------------------------------------------------------------------
2579 | NPT_XmlWriter::Serialize
2580 +---------------------------------------------------------------------*/
2581 NPT_Result
2582 NPT_XmlWriter::Serialize(NPT_XmlNode& node,
2583 NPT_OutputStream& output,
2584 bool add_xml_decl)
2586 NPT_XmlSerializer serializer(&output, m_Indentation, true, add_xml_decl);
2587 NPT_XmlNodeWriter node_writer(serializer);
2588 NPT_XmlNode* node_pointer = &node;
2589 node_writer(node_pointer);
2591 return NPT_SUCCESS;
2594 /*----------------------------------------------------------------------
2595 | NPT_XmlCanonicalizer::Serialize
2596 +---------------------------------------------------------------------*/
2597 NPT_Result
2598 NPT_XmlCanonicalizer::Serialize(NPT_XmlNode& node,
2599 NPT_OutputStream& output,
2600 bool add_xml_decl)
2602 // create a serializer with no indentation and no shrinking of empty elements
2603 NPT_XmlSerializer serializer(&output, 0, false, add_xml_decl);
2605 // serialize the node
2606 NPT_XmlNodeCanonicalWriter node_writer(serializer);
2607 NPT_XmlNode* node_pointer = &node;
2608 node_writer(node_pointer);
2610 return NPT_SUCCESS;