1 // ----------------------------------------------------------------------------
2 // Copyright (C) 2006-2007 Marcin Kalicinski
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
8 // For more information, see www.boost.org
9 // This file is derived from RapidXml project, see http://rapidxml.sourceforge.net
10 // ----------------------------------------------------------------------------
11 #ifndef RAPIDXML_HPP_INCLUDED
12 #define RAPIDXML_HPP_INCLUDED
14 // Revision $DateTime: 2007/03/22 21:25:05 $
15 //! \file rapidxml.hpp This file contains rapidxml parser and DOM implementation
17 #include <cstdlib> // For std::size_t
18 #include <cassert> // For assert
19 #include <new> // For placement new
21 // On MSVC, disable "conditional expression is constant" warning (level 4).
22 // This warning is almost impossible to avoid with certain types of templated code
25 #pragma warning(disable:4127) // Conditional expression is constant
28 ///////////////////////////////////////////////////////////////////////////
29 // RAPIDXML_PARSE_ERROR
31 #if defined(RAPIDXML_NO_EXCEPTIONS)
33 #define RAPIDXML_PARSE_ERROR(what, where) { parse_error_handler(what, where); assert(0); }
37 //! When exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS,
38 //! this function is called to notify user about the error.
39 //! It must be defined by the user.
41 //! This function cannot return. If it does, the results are undefined.
43 //! A very simple definition might look like that:
45 //! void %rapidxml::%parse_error_handler(const char *what, void *where)
47 //! std::cout << "Parse error: " << what << "\n";
51 //! \param what Human readable description of the error.
52 //! \param where Pointer to character data where error was detected.
53 void parse_error_handler(const char *what
, void *where
);
58 #include <exception> // For std::exception
60 #define RAPIDXML_PARSE_ERROR(what, where) throw parse_error(what, where)
65 //! Parse error exception.
66 //! This exception is thrown by the parser when an error occurs.
67 //! Use what() function to get human-readable error message.
68 //! Use where() function to get a pointer to position within source text where error was detected.
70 //! If throwing exceptions by the parser is undesirable,
71 //! it can be disabled by defining RAPIDXML_NO_EXCEPTIONS macro before rapidxml.hpp is included.
72 //! This will cause the parser to call rapidxml::parse_error_handler() function instead of throwing an exception.
73 //! This function must be defined by the user.
75 //! This class derives from <code>std::exception</code> class.
76 class parse_error
: public std::exception
81 //! Constructs parse error
82 parse_error(const char *what
, void *where
)
88 //! Gets human readable description of error.
89 //! \return Pointer to null terminated description of the error.
90 virtual const char *what() const throw()
95 //! Gets pointer to character data where error happened.
96 //! Ch should be the same as char type of xml_document that produced the error.
97 //! \return Pointer to location within the parsed string where error occured.
101 return reinterpret_cast<Ch
*>(m_where
);
117 // Forward declarations
118 template<class Ch
> class xml_node
;
119 template<class Ch
> class xml_attribute
;
121 //! Enumeration listing all node types produced by the parser.
122 //! Use xml_node::type() function to query node type.
125 node_document
, //!< A document node. Name and value are empty.
126 node_element
, //!< An element node. Name contains element name. Value contains text of first data node.
127 node_data
, //!< A data node. Name is empty. Value contains data text.
128 node_cdata
, //!< A CDATA node. Name is empty. Value contains data text.
129 node_comment
, //!< A comment node. Name is empty. Value contains comment text.
130 node_declaration
, //!< A declaration node. Name and value are empty. Declaration parameters (version, encoding and standalone) are in node attributes.
131 node_doctype
, //!< A DOCTYPE node. Name is empty. Value contains DOCTYPE text.
132 node_pi
//!< A PI node. Name contains target. Value contains instructions.
135 ///////////////////////////////////////////////////////////////////////
138 //! Parse flag instructing the parser to not create data nodes.
139 //! Text of first data node will still be placed in value of parent element,
140 //! unless parse_no_element_values flag is also specified.
141 //! Can be combined with other flags by use of | operator.
142 const int parse_no_data_nodes
= 0x1;
144 //! Parse flag instructing the parser to not use text of first data node as a value of parent element.
145 //! Can be combined with other flags by use of | operator.
146 const int parse_no_element_values
= 0x2;
148 //! Parse flag instructing the parser to not place zero terminators after strings in the source text.
149 //! By default zero terminators are placed, modifying source text.
150 //! Can be combined with other flags by use of | operator.
151 const int parse_no_string_terminators
= 0x4;
153 //! Parse flag instructing the parser to not translate entities in the source text.
154 //! By default entities are translated, modifying source text.
155 //! Can be combined with other flags by use of | operator.
156 const int parse_no_entity_translation
= 0x8;
158 //! Parse flag instructing the parser to disable UTF-8 handling and assume plain 8 bit characters.
159 //! By default, UTF-8 handling is enabled.
160 //! Can be combined with other flags by use of | operator.
161 const int parse_no_utf8
= 0x10;
163 //! Parse flag instructing the parser to create XML declaration node.
164 //! By default, declaration node is not created.
165 //! Can be combined with other flags by use of | operator.
166 const int parse_declaration_node
= 0x20;
168 //! Parse flag instructing the parser to create comments nodes.
169 //! By default, comment nodes are not created.
170 //! Can be combined with other flags by use of | operator.
171 const int parse_comment_nodes
= 0x40;
173 //! Parse flag instructing the parser to create DOCTYPE node.
174 //! By default, doctype node is not created.
175 //! Although W3C specification allows at most one DOCTYPE node, RapidXml will silently accept documents with more than one.
176 //! Can be combined with other flags by use of | operator.
177 const int parse_doctype_node
= 0x80;
179 //! Parse flag instructing the parser to create PI nodes.
180 //! By default, PI nodes are not created.
181 //! Can be combined with other flags by use of | operator.
182 const int parse_pi_nodes
= 0x100;
184 //! Parse flag instructing the parser to validate closing tag names.
185 //! If not set, name inside closing tag is irrelevant to the parser.
186 //! By default, closing tags are not validated.
187 //! Can be combined with other flags by use of | operator.
188 const int parse_validate_closing_tags
= 0x200;
190 //! Parse flag instructing the parser to trim leading and trailing whitespace of text,
191 //! and condense all interior whitespace runs to a single space character.
192 //! By default, whitespace is not normalized.
193 //! If this flag is specified, source text will be modified.
194 //! Can be combined with other flags by use of | operator.
195 const int parse_normalize_whitespace
= 0x400;
199 //! Parse flags which represent default behaviour of the parser.
200 //! This is always equal to 0, so that all other flags can be simply ored together.
201 //! Normally there is no need to inconveniently disable flags by anding with their negated (~) values.
202 //! This also means that meaning of each flag is a <i>negation</i> of the default setting.
203 //! For example, if flag reads <code>parse_no_utf8</code>, it means that utf-8 is <i>enabled</i> by default,
204 //! and using the flag will disable it.
205 const int parse_default
= 0;
207 //! A combination of parse flags that forbids any modifications of the source text.
208 //! This also results in faster parsing. However, note that the following will occur:
210 //! <li>names and values of nodes will not be zero terminated, you have to use xml_base::name_size() and xml_base::value_size() functions to determine where name and value ends</li>
211 //! <li>entities will not be translated</li>
212 //! <li>whitespace will not be normalized</li>
214 const int parse_non_destructive
= parse_no_string_terminators
| parse_no_entity_translation
;
216 //! A combination of parse flags resulting in fastest possible parsing without sacrificing important data.
217 const int parse_fastest
= parse_non_destructive
| parse_no_data_nodes
;
219 //! A combination of parse flags resulting in largest amount of data being extracted.
220 //! This usually results in slowest parsing.
221 const int parse_full
= parse_declaration_node
| parse_comment_nodes
| parse_doctype_node
| parse_pi_nodes
| parse_validate_closing_tags
| parse_normalize_whitespace
;
223 ///////////////////////////////////////////////////////////////////////
230 // Struct that contains lookup tables for the parser
231 // It must be a template to allow correct linking (because it has static data members, which are defined in a header file).
235 static const unsigned char lookup_whitespace
[256]; // Whitespace table
236 static const unsigned char lookup_node_name
[256]; // Node name table
237 static const unsigned char lookup_text
[256]; // Text table
238 static const unsigned char lookup_text_pure_no_ws
[256]; // Text table
239 static const unsigned char lookup_text_pure_with_ws
[256]; // Text table
240 static const unsigned char lookup_attribute_name
[256]; // Attribute name table
241 static const unsigned char lookup_attribute_data_1
[256]; // Attribute data table with single quote
242 static const unsigned char lookup_attribute_data_1_pure
[256]; // Attribute data table with single quote
243 static const unsigned char lookup_attribute_data_2
[256]; // Attribute data table with double quotes
244 static const unsigned char lookup_attribute_data_2_pure
[256]; // Attribute data table with double quotes
245 static const unsigned char lookup_digits
[256]; // Digits
248 // Find length of the string
250 inline std::size_t measure(const Ch
*p
)
258 // Compare strings for equality
260 inline bool compare(const Ch
*p1
, std::size_t size1
, const Ch
*p2
, std::size_t size2
)
264 for (const Ch
*end
= p1
+ size1
; p1
< end
; ++p1
, ++p2
)
273 ///////////////////////////////////////////////////////////////////////
276 //! This class is used by the parser to create new nodes and attributes, without overheads of dynamic memory allocation.
277 //! In most cases, you will not need to use this class directly.
278 //! However, if you need to create nodes manually or modify names/values of nodes,
279 //! you are encouraged to use memory_pool of relevant xml_document to allocate the memory.
280 //! Not only is this faster than allocating them by using <code>new</code> operator,
281 //! but also their lifetime will be tied to the lifetime of document,
282 //! possibly simplyfing memory management.
284 //! Call allocate_node() or allocate_attribute() functions to obtain new nodes or attributes from the pool.
285 //! You can also call allocate_string() function to allocate strings.
286 //! Such strings can then be used as names or values of nodes without worrying about their lifetime.
287 //! Note that there is no <code>free()</code> function -- all allocations are freed at once when clear() function is called,
288 //! or when the pool is destroyed.
290 //! It is also possible to create a standalone memory_pool, and use it
291 //! to allocate nodes, whose lifetime will not be tied to any document.
293 //! Pool maintains <code>StaticBlockSize</code> bytes of statically allocated memory.
294 //! Until this memory is exhausted, no dynamic memory allocations are performed.
295 //! When static memory is exhausted, pool allocates additional chunks of memory,
296 //! by using <code>new</code> and <code>delete</code> operators.
297 //! This behaviour can be changed by suppyling custom allocation routines.
298 //! Use set_allocator() function to set them.
299 //! \param Ch Character type of created nodes.
300 //! \param StaticBlockSize Size of static memory block owned by the pool, in bytes. Pool is guaranteed not to make dynamic memory allocations until this block is exhausted. Using too large static block will cause stack overflow if xml_document or memory_pool is allocated on the stack.
301 template<class Ch
, int StaticBlockSize
>
308 typedef void *(alloc_func
)(std::size_t); // Type of user-defined function used to allocate memory
309 typedef void (free_func
)(void *); // Type of user-defined function used to free memory
312 //! Constructs empty pool with default allocator functions.
314 : m_block(&m_static_block
)
321 //! Destroys pool and frees all the memory.
322 //! This causes memory occupied by nodes allocated by the pool to be freed.
323 //! Nodes allocated from the pool are no longer valid.
329 //! Allocates a new node from the pool.
330 //! If the allocation request cannot be accomodated, this function will throw <code>std::bad_alloc</code>.
331 //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function
332 //! will call parse_error_handler() function.
333 //! \param type Type of node to allocate.
334 //! \return Pointer to allocated node. This pointer will never be NULL.
335 xml_node
<Ch
> *allocate_node(node_type type
)
337 void *memory
= allocate_memory(sizeof(xml_node
<Ch
>));
338 xml_node
<Ch
> *node
= new(memory
) xml_node
<Ch
>(type
);
342 //! Allocates a new element node from the pool and assigns name and value to it.
343 //! If the allocation request cannot be accomodated, this function will throw <code>std::bad_alloc</code>.
344 //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function
345 //! will call parse_error_handler() function.
346 //! \param name Name to assign to the element, or 0 to assign no name.
347 //! \param value Value to assign to the element, or 0 to assign no value.
348 //! \param name_size Size of name to assign, or 0 to automatically calculate from name string.
349 //! \param value_size Size of value to assign, or 0 to automatically calculate from value string.
350 //! \return Pointer to allocated element. This pointer will never be NULL.
351 xml_node
<Ch
> *allocate_element(Ch
*name
, Ch
*value
= 0,
352 std::size_t name_size
= 0, std::size_t value_size
= 0)
354 xml_node
<Ch
> *element
= allocate_node(node_element
);
358 element
->name(name
, name_size
);
365 element
->value(value
, value_size
);
367 element
->value(value
);
372 //! Allocates a new data node from the pool and assigns a value to it.
373 //! If the allocation request cannot be accomodated, this function will throw <code>std::bad_alloc</code>.
374 //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function
375 //! will call parse_error_handler() function.
376 //! \param value Value to assign to the element, or 0 to assign no value.
377 //! \param value_size Size of value to assign, or 0 to automatically calculate from value string.
378 //! \return Pointer to allocated element. This pointer will never be NULL.
379 xml_node
<Ch
> *allocate_data(Ch
*value
= 0, std::size_t value_size
= 0)
381 xml_node
<Ch
> *data
= allocate_node(node_data
);
385 data
->value(value
, value_size
);
392 //! Allocates a new attribute from the pool and assigns name and value to it.
393 //! If the allocation request cannot be accomodated, this function will throw <code>std::bad_alloc</code>.
394 //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function
395 //! will call parse_error_handler() function.
396 //! \return Pointer to allocated attribute. This pointer will never be NULL.
397 xml_attribute
<Ch
> *allocate_attribute()
399 void *memory
= allocate_memory(sizeof(xml_attribute
<Ch
>));
400 xml_attribute
<Ch
> *attribute
= new(memory
) xml_attribute
<Ch
>;
404 //! Allocates a new attribute from the pool and assigns name and value to it.
405 //! If the allocation request cannot be accomodated, this function will throw <code>std::bad_alloc</code>.
406 //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function
407 //! will call parse_error_handler() function.
408 //! \param name Name to assign to the attribute, or 0 to assign no name.
409 //! \param value Value to assign to the attribute, or 0 to assign no value.
410 //! \param name_size Size of name to assign, or 0 to automatically calculate from name string.
411 //! \param value_size Size of value to assign, or 0 to automatically calculate from value string.
412 //! \return Pointer to allocated attribute. This pointer will never be NULL.
413 xml_attribute
<Ch
> *allocate_attribute(Ch
*name
, Ch
*value
= 0,
414 std::size_t name_size
= 0, std::size_t value_size
= 0)
416 xml_attribute
<Ch
> *attribute
= allocate_attribute();
420 attribute
->name(name
, name_size
);
422 attribute
->name(name
);
427 attribute
->value(value
, value_size
);
429 attribute
->value(value
);
434 //! Allocates a char array of given size from the pool.
435 //! If the allocation request cannot be accomodated, this function will throw <code>std::bad_alloc</code>.
436 //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function
437 //! will call parse_error_handler() function.
438 //! \param size Number of characters to allocate.
439 //! \return Pointer to allocated char array. This pointer will never be NULL.
440 Ch
*allocate_string(std::size_t size
)
443 void *memory
= allocate_memory(size
);
444 return static_cast<Ch
*>(memory
);
447 //! Allocates a char array of appropriate size from the pool,
448 //! and copies given string to it.
449 //! If the allocation request cannot be accomodated, this function will throw <code>std::bad_alloc</code>.
450 //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function
451 //! will call parse_error_handler() function.
452 //! \param source String to initialize the allocated memory with.
453 //! \param size Number of characters to allocate, or zero to calculate it automatically from string length.
454 //! \return Pointer to allocated char array. This pointer will never be NULL.
455 Ch
*allocate_string(const Ch
*source
, std::size_t size
= 0)
459 size
= internal::measure(source
) + 1;
460 Ch
*result
= allocate_string(size
);
461 for (std::size_t i
= 0; i
< size
; ++i
)
462 result
[i
] = source
[i
];
467 //! This causes memory occupied by nodes allocated by the pool will be freed.
468 //! Nodes allocated from the pool will no longer be valid.
471 while (m_block
!= &m_static_block
)
473 block
*tmp
= m_block
->previous_block
;
475 m_free_func(m_block
);
480 m_block
->pointer
= m_block
->data
; // Restore static block pointer
483 //! Sets or resets the user-defined memory allocation functions for the pool.
484 //! This can only be called when no memory is allocated from the pool yet, otherwise results are undefined.
485 //! Allocation function must not return invalid pointer on failure. It should either throw,
486 //! stop the program, or use <code>longjmp()</code> function to pass control to other place of program.
487 //! If it returns invalid pointer, results are undefined.
489 //! User defined allocation functions must have the following forms:
491 //! <br>void *allocate(std::size_t size);
492 //! <br>void free(void *pointer);
494 //! \param af Allocation function, or 0 to restore default function
495 //! \param ff Free function, or 0 to restore default function
496 void set_allocator(alloc_func
*af
, free_func
*ff
)
498 assert(m_block
== &m_static_block
&& m_block
->pointer
== m_block
->data
); // Verify that no memory is allocated
508 block(block
*previous_block
)
509 : previous_block(previous_block
)
513 block
*previous_block
; // Pointer to previous block in list (used during deallocation)
514 char *pointer
; // Pointer to first free byte in block
515 char data
[StaticBlockSize
]; // Memory
518 // Allocates memory from block
519 void *allocate_memory(std::size_t size
)
521 if (size
> StaticBlockSize
)
522 RAPIDXML_PARSE_ERROR("out of memory", 0);
523 if (m_block
->pointer
- m_block
->data
+ size
> StaticBlockSize
) // If current block exhausted, allocate a new block
527 void *memory
= m_alloc_func(sizeof(block
));
528 assert(memory
); // Allocator is not allowed to return 0, on failure it must either throw, stop the program or use longjmp
529 m_block
= new(memory
) block(m_block
);
533 m_block
= new block(m_block
);
534 #ifdef RAPIDXML_NO_EXCEPTIONS
535 // If exceptions are disabled, verify memory allocation, because new will not be able to throw bad_alloc
538 RAPIDXML_PARSE_ERROR("out of memory", 0);
543 char *result
= m_block
->pointer
;
544 m_block
->pointer
+= size
; // Advance pointer to after current allocation
548 block
*m_block
; // Current block
549 block m_static_block
; // Static block
550 alloc_func
*m_alloc_func
; // Allocator function, or 0 if default is to be used
551 free_func
*m_free_func
; // Free function, or 0 if default is to be used
555 ///////////////////////////////////////////////////////////////////////////
558 //! Base class for xml_node and xml_attribute implementing common functions:
559 //! name(), name_size(), value(), value_size() and parent().
560 //! \param Ch Character type to use
567 ///////////////////////////////////////////////////////////////////////////
568 // Construction & destruction
570 // Construct a base with empty name, value and parent
578 ///////////////////////////////////////////////////////////////////////////
581 //! Gets name of the node.
582 //! Interpretation of name depends on type of node.
583 //! Note that name will not be zero-terminated if parse_no_string_terminators option was selected during parse.
585 //! Use name_size() function to determine length of the name.
586 //! \return Name of node, or empty string if node has no name.
589 return m_name
? m_name
: nullstr();
592 //! Gets size of node name, not including terminator character.
593 //! This function works correctly irrespective of whether name is or is not zero terminated.
594 //! \return Size of node name, in characters.
595 std::size_t name_size() const
597 return m_name
? m_name_size
: 0;
600 //! Gets value of node.
601 //! Interpretation of value depends on type of node.
602 //! Note that value will not be zero-terminated if parse_no_string_terminators option was selected during parse.
604 //! Use value_size() function to determine length of the value.
605 //! \return Value of node, or empty string if node has no value.
608 return m_value
? m_value
: nullstr();
611 //! Gets size of node value, not including terminator character.
612 //! This function works correctly irrespective of whether value is or is not zero terminated.
613 //! \return Size of node value, in characters.
614 std::size_t value_size() const
616 return m_value
? m_value_size
: 0;
619 ///////////////////////////////////////////////////////////////////////////
622 //! Sets name of node to a non zero-terminated string.
623 //! See <a href="lifetimes">lifetimes of names and values</a>.
625 //! Note that node does not own its name or value, it only stores a pointer to it.
626 //! It will not delete or otherwise free the pointer on destruction.
627 //! It is reponsibility of the user to properly manage lifetime of the string.
628 //! The easiest way to achieve it is to use memory_pool of the document to allocate the string -
629 //! on destruction of the document the string will be automatically freed.
631 //! Size of name must be specified separately, because it does not have to be zero terminated.
632 //! Use name(const Ch *) function to have the length automatically calculated (string must be zero terminated).
633 //! \param name Name of node to set. Does not have to be zero terminated.
634 //! \param size Size of name, in characters. This does not include zero terminator, if one is present.
635 void name(const Ch
*name
, std::size_t size
)
637 m_name
= const_cast<Ch
*>(name
);
641 //! Sets name of node to a zero-terminated string.
642 //! See <a href="lifetimes">lifetimes of names and values</a>.
643 //! \param name Name of node to set. Must be zero terminated.
644 void name(const Ch
*name
)
646 this->name(name
, internal::measure(name
));
649 //! Sets value of node to a non zero-terminated string.
650 //! See <a href="lifetimes">lifetimes of values and values</a>.
652 //! Note that node does not own its name or value, it only stores a pointer to it.
653 //! It will not delete or otherwise free the pointer on destruction.
654 //! It is reponsibility of the user to properly manage lifetime of the string.
655 //! The easiest way to achieve it is to use memory_pool of the document to allocate the string -
656 //! on destruction of the document the string will be automatically freed.
658 //! Size of value must be specified separately, because it does not have to be zero terminated.
659 //! Use value(const Ch *) function to have the length automatically calculated (string must be zero terminated).
660 //! \param value value of node to set. Does not have to be zero terminated.
661 //! \param size Size of value, in characters. This does not include zero terminator, if one is present.
662 void value(const Ch
*value
, std::size_t size
)
664 m_value
= const_cast<Ch
*>(value
);
668 //! Sets value of node to a zero-terminated string.
669 //! See <a href="lifetimes">lifetimes of names and values</a>.
670 //! \param value Vame of node to set. Must be zero terminated.
671 void value(const Ch
*value
)
673 this->value(value
, internal::measure(value
));
676 ///////////////////////////////////////////////////////////////////////////
677 // Related nodes access
679 //! Gets node parent.
680 //! \return Pointer to parent node, or 0 if node has no parent.
681 xml_node
<Ch
> *parent() const
688 // Return empty string
691 static Ch zero
= Ch('\0');
695 Ch
*m_name
; // Name of node, or 0 if no name
696 Ch
*m_value
; // Value of node, or 0 if no value
697 std::size_t m_name_size
; // Length of node name, or undefined of no name
698 std::size_t m_value_size
; // Length of node value, or undefined if no value
699 xml_node
<Ch
> *m_parent
; // Pointer to parent node, or 0 if none
703 //! Class representing attribute node of XML document.
704 //! Each attribute has name and value strings, which are available through name() and value() functions (inherited from xml_base).
705 //! Note that after parse, both name and value of attribute will point to interior of source text used for parsing.
706 //! Thus, this text must persist in memory for the lifetime of attribute.
707 //! \param Ch Character type to use.
709 class xml_attribute
: public xml_base
<Ch
>
712 friend class xml_node
<Ch
>;
716 ///////////////////////////////////////////////////////////////////////////
717 // Construction & destruction
719 //! Constructs an empty attribute with the specified type.
720 //! Consider using memory_pool of appropriate xml_document if allocating attributes manually.
725 ///////////////////////////////////////////////////////////////////////////
726 // Related nodes access
728 //! Gets previous attribute.
729 //! \return Pointer to previous sibling of attribute, or 0 if attribute has no previous sibling.
730 xml_attribute
<Ch
> *previous_attribute() const
732 return this->m_parent
&& m_prev_attribute
? m_prev_attribute
: 0;
735 //! Finds previous attribute with given name.
736 //! \param name Name of attribute to find, must be zero-terminated
737 //! \return Pointer to found attribute, or 0 if not found.
738 xml_attribute
<Ch
> *previous_attribute(const Ch
*name
) const
741 return previous_attribute(name
, internal::measure(name
));
744 //! Finds previous attribute with given name.
745 //! \param name Name of attribute to find, doesn't have to be zero-terminated
746 //! \param name_size Size of name, in characters
747 //! \return Pointer to found attribute, or 0 if not found.
748 xml_attribute
<Ch
> *previous_attribute(const Ch
*name
, std::size_t name_size
) const
751 for (xml_attribute
<Ch
> *attribute
= previous_attribute(); attribute
; attribute
= attribute
->previous_attribute())
752 if (internal::compare(attribute
->name(), attribute
->name_size(), name
, name_size
))
757 //! Gets next attribute.
758 //! \return Pointer to next sibling of attribute, or 0 if attribute has no next sibling.
759 xml_attribute
<Ch
> *next_attribute() const
761 return this->m_parent
? m_next_attribute
: 0;
764 //! Finds next attribute with given name.
765 //! \param name Name of attribute to find, must be zero-terminated
766 //! \return Pointer to found attribute, or 0 if not found.
767 xml_attribute
<Ch
> *next_attribute(const Ch
*name
) const
770 return next_attribute(name
, internal::measure(name
));
773 //! Finds next attribute with given name.
774 //! \param name Name of attribute to find, doesn't have to be zero-terminated
775 //! \param name_size Size of name, in characters
776 //! \return Pointer to found attribute, or 0 if not found.
777 xml_attribute
<Ch
> *next_attribute(const Ch
*name
, std::size_t name_size
) const
780 for (xml_attribute
<Ch
> *attribute
= next_attribute(); attribute
; attribute
= attribute
->next_attribute())
781 if (internal::compare(attribute
->name(), attribute
->name_size(), name
, name_size
))
788 xml_attribute
<Ch
> *m_prev_attribute
; // Pointer to previous sibling of attribute, or 0 if none; only valid if parent is non-zero
789 xml_attribute
<Ch
> *m_next_attribute
; // Pointer to next sibling of attribute, or 0 if none; only valid if parent is non-zero
793 ///////////////////////////////////////////////////////////////////////////
796 //! Class representing a node of XML document.
797 //! Each node may have associated name and value strings, which are available through name() and value() functions.
798 //! Interpretation of name and value depends on type of the node.
799 //! Type of node can be determined by using type() function.
801 //! Note that after parse, both name and value of node, if any, will point interior of source text used for parsing.
802 //! Thus, this text must persist in the memory for the lifetime of node.
803 //! \param Ch Character type to use.
805 class xml_node
: public xml_base
<Ch
>
810 ///////////////////////////////////////////////////////////////////////////
811 // Construction & destruction
813 //! Constructs an empty node with the specified type.
814 //! Consider using memory_pool of appropriate document to allocate nodes manually.
815 //! \param type Type of node to construct.
816 xml_node(node_type type
)
819 , m_first_attribute(0)
823 ///////////////////////////////////////////////////////////////////////////
826 //! Gets type of node.
827 //! \return Type of node.
828 node_type
type() const
833 ///////////////////////////////////////////////////////////////////////////
834 // Related nodes access
836 //! Gets first child of node
837 //! \return Pointer to first child of node, or 0 if node has no children.
838 xml_node
<Ch
> *first_child() const
840 return m_first_child
;
843 //! Finds first child of node with given name
844 //! \param name Name of child to find, must be zero-terminated.
845 //! \return Pointer to found child of node, or 0 if not found.
846 xml_node
<Ch
> *first_child(const Ch
*name
) const
849 return first_child(name
, internal::measure(name
));
852 //! Finds first child of node with given name
853 //! \param name Name of child to find, doesn't have to be zero-terminated.
854 //! \param name_size Size of name, in characters.
855 //! \return Pointer to found child of node, or 0 if not found.
856 xml_node
<Ch
> *first_child(const Ch
*name
, std::size_t name_size
) const
859 for (xml_node
<Ch
> *child
= first_child(); child
; child
= child
->next_sibling())
860 if (internal::compare(child
->name(), child
->name_size(), name
, name_size
))
865 //! Gets last child of node. Behaviour is undefined if node has no children.
866 //! Use first_child() to test if node has children.
867 //! \return Pointer to last child of node.
868 xml_node
<Ch
> *last_child() const
870 assert(m_first_child
); // Cannot query for last child if node has no children
874 //! Finds last child of node with given name. Behaviour is undefined if node has no children.
875 //! Use first_child() to test if node has children.
876 //! \param name Name of child to find, must be zero-terminated
877 //! \return Pointer to found child of node, or 0 if not found.
878 xml_node
<Ch
> *last_child(const Ch
*name
) const
881 return first_child(name
, internal::measure(name
));
884 //! Finds last child of node with given name. Behaviour is undefined if node has no children.
885 //! Use first_child() to test if node has children.
886 //! \param name Name of child to find, doesn't have to be zero-terminated
887 //! \param name_size Size of name, in characters
888 //! \return Pointer to found child of node, or 0 if not found.
889 xml_node
<Ch
> *last_child(const Ch
*name
, std::size_t name_size
) const
892 for (xml_node
<Ch
> *child
= last_child(); child
; child
= child
->prev_sibling())
893 if (internal::compare(child
->name(), child
->name_size(), name
, name_size
))
898 //! Gets previous sibling of node. Behaviour is undefined if node has no parent.
899 //! Use parent() to test if node has a parent.
900 //! \return Pointer to previous sibling of node, or 0 if node has no previous sibling.
901 xml_node
<Ch
> *previous_sibling() const
903 assert(this->m_parent
); // Cannot query for siblings if node has no parent
904 return m_prev_sibling
;
907 //! Finds previous sibling of node with given name. Behaviour is undefined if node has no parent.
908 //! Use parent() to test if node has a parent.
909 //! \param name Name of sibling to find, must be zero-terminated
910 //! \return Pointer to found sibling of node, or 0 if not found.
911 xml_node
<Ch
> *previous_sibling(const Ch
*name
) const
914 return previous_sibling(name
, internal::measure(name
));
917 //! Finds previous sibling of node with given name. Behaviour is undefined if node has no parent.
918 //! Use parent() to test if node has a parent.
919 //! \param name Name of sibling to find, doesn't have to be zero-terminated
920 //! \param name_size Size of name, in characters
921 //! \return Pointer to found sibling of node, or 0 if not found.
922 xml_node
<Ch
> *previous_sibling(const Ch
*name
, std::size_t name_size
) const
925 for (xml_node
<Ch
> *sibling
= previous_sibling(); sibling
; sibling
= sibling
->previous_sibling())
926 if (internal::compare(sibling
->name(), sibling
->name_size(), name
, name_size
))
931 //! Gets next sibling of node. Behaviour is undefined if node has no parent.
932 //! Use parent() to test if node has a parent.
933 //! \return Pointer to next sibling of node, or 0 if node has no next sibling.
934 xml_node
<Ch
> *next_sibling() const
936 assert(this->m_parent
); // Cannot query for siblings if node has no parent
937 return m_next_sibling
;
940 //! Finds next sibling of node with given name. Behaviour is undefined if node has no parent.
941 //! Use parent() to test if node has a parent.
942 //! \param name Name of sibling to find, must be zero-terminated.
943 //! \return Pointer to found sibling of node, or 0 if not found.
944 xml_node
<Ch
> *next_sibling(const Ch
*name
) const
947 return next_sibling(name
, internal::measure(name
));
950 //! Finds next sibling of node with given name. Behaviour is undefined if node has no parent.
951 //! Use parent() to test if node has a parent.
952 //! \param name Name of sibling to find, doesn't have to be zero-terminated.
953 //! \param name_size Size of name, in characters.
954 //! \return Pointer to found sibling of node, or 0 if not found.
955 xml_node
<Ch
> *next_sibling(const Ch
*name
, std::size_t name_size
) const
958 for (xml_node
<Ch
> *sibling
= next_sibling(); sibling
; sibling
= sibling
->next_sibling())
959 if (internal::compare(sibling
->name(), sibling
->name_size(), name
, name_size
))
964 //! Gets first attribute of node.
965 //! \return Pointer to first attribute of node, or 0 if node has no attributes.
966 xml_attribute
<Ch
> *first_attribute() const
968 return m_first_attribute
;
971 //! Finds first attribute of node with given name.
972 //! \param name Name of attribute to find, must be zero-terminated.
973 //! \return Pointer to found attribute, or 0 if not found.
974 xml_attribute
<Ch
> *first_attribute(const Ch
*name
) const
977 return first_attribute(name
, internal::measure(name
));
980 //! Finds first attribute of node with given name
981 //! \param name Name of attribute to find, doesn't have to be zero-terminated.
982 //! \param name_size Size of name, in characters.
983 //! \return Pointer to found attribute, or 0 if not found.
984 xml_attribute
<Ch
> *first_attribute(const Ch
*name
, std::size_t name_size
) const
987 for (xml_attribute
<Ch
> *attribute
= first_attribute(); attribute
; attribute
= attribute
->next_attribute())
988 if (internal::compare(attribute
->name(), attribute
->name_size(), name
, name_size
))
993 //! Gets last attribute of node.
994 //! \return Pointer to last attribute of node, or 0 if node has no attributes.
995 xml_attribute
<Ch
> *last_attribute() const
997 return m_first_attribute
? m_last_attribute
: 0;
1000 //! Finds last attribute of node with given name.
1001 //! \param name Name of attribute to find, must be zero-terminated.
1002 //! \return Pointer to found attribute, or 0 if not found.
1003 xml_attribute
<Ch
> *last_attribute(const Ch
*name
) const
1006 return last_attribute(name
, internal::measure(name
));
1009 //! Finds last attribute of node with given name.
1010 //! \param name Name of attribute to find, doesn't have to be zero-terminated.
1011 //! \param name_size Size of name, in characters.
1012 //! \return Pointer to found attribute, or 0 if not found.
1013 xml_attribute
<Ch
> *last_attribute(const Ch
*name
, std::size_t name_size
) const
1016 for (xml_attribute
<Ch
> *attribute
= last_attribute(); attribute
; attribute
= attribute
->previous_attribute())
1017 if (internal::compare(attribute
->name(), attribute
->name_size(), name
, name_size
))
1022 ///////////////////////////////////////////////////////////////////////////
1023 // Node modification
1025 //! Sets type of node.
1026 //! \param type Type of node to set.
1027 void type(node_type type
)
1032 ///////////////////////////////////////////////////////////////////////////
1033 // Node manipulation
1035 //! Prepends a new child to the node.
1036 //! The prepended child becomes the first child, and all existing children are moved one position back.
1037 //! \param child Node to prepend.
1038 void prepend_child(xml_node
<Ch
> *child
)
1040 assert(child
&& !child
->parent() && child
->type() != node_document
);
1043 child
->m_next_sibling
= m_first_child
;
1044 m_first_child
->m_prev_sibling
= child
;
1048 child
->m_next_sibling
= 0;
1049 m_last_child
= child
;
1051 m_first_child
= child
;
1052 child
->m_parent
= this;
1053 child
->m_prev_sibling
= 0;
1056 //! Appends a new child to the node.
1057 //! The appended child becomes the last child.
1058 //! \param child Node to append.
1059 void append_child(xml_node
<Ch
> *child
)
1061 assert(child
&& !child
->parent() && child
->type() != node_document
);
1064 child
->m_prev_sibling
= m_last_child
;
1065 m_last_child
->m_next_sibling
= child
;
1069 child
->m_prev_sibling
= 0;
1070 m_first_child
= child
;
1072 m_last_child
= child
;
1073 child
->m_parent
= this;
1074 child
->m_next_sibling
= 0;
1077 //! Inserts a new child at specified place inside the node.
1078 //! All children after and including the specified node are moved one position back.
1079 //! \param where Place where to insert the child, or 0 to insert at the back.
1080 //! \param child Node to insert.
1081 void insert_child(xml_node
<Ch
> *where
, xml_node
<Ch
> *child
)
1083 assert(!where
|| where
->parent() == this);
1084 assert(child
&& !child
->parent() && child
->type() != node_document
);
1085 if (where
== m_first_child
)
1086 prepend_child(child
);
1087 else if (where
== 0)
1088 append_child(child
);
1091 child
->m_prev_sibling
= where
->m_prev_sibling
;
1092 child
->m_next_sibling
= where
;
1093 where
->m_prev_sibling
->m_next_sibling
= child
;
1094 where
->m_prev_sibling
= child
;
1095 child
->m_parent
= this;
1099 //! Removes first child from the node.
1100 //! If node has no children, behaviour is undefined.
1101 //! Use first_child() to test if node has children.
1102 void remove_first_child()
1104 assert(first_child());
1105 xml_node
<Ch
> *child
= m_first_child
;
1106 m_first_child
= child
->m_next_sibling
;
1107 if (child
->m_next_sibling
)
1108 child
->m_next_sibling
->m_prev_sibling
= 0;
1111 child
->m_parent
= 0;
1114 //! Removes last child of the node.
1115 //! If node has no children, behaviour is undefined.
1116 //! Use first_child() to test if node has children.
1117 void remove_last_child()
1119 assert(first_child());
1120 xml_node
<Ch
> *child
= m_last_child
;
1121 if (child
->m_prev_sibling
)
1123 m_last_child
= child
->m_prev_sibling
;
1124 child
->m_prev_sibling
->m_next_sibling
= 0;
1128 child
->m_parent
= 0;
1131 //! Removes specified child from the node
1132 // \param where Pointer to child to be removed.
1133 void remove_child(xml_node
<Ch
> *where
)
1135 assert(where
&& where
->parent() == this);
1136 assert(first_child());
1137 if (where
== m_first_child
)
1138 remove_first_child();
1139 else if (where
== m_last_child
)
1140 remove_last_child();
1143 where
->m_prev_sibling
->m_next_sibling
= where
->m_next_sibling
;
1144 where
->m_next_sibling
->m_prev_sibling
= where
->m_prev_sibling
;
1145 where
->m_parent
= 0;
1149 //! Removes all children of node (but not attributes).
1150 void remove_all_children()
1152 for (xml_node
<Ch
> *node
= first_child(); node
; node
= node
->m_next_sibling
)
1157 //! Prepends a new attribute to the node.
1158 //! \param attribute Attribute to prepend.
1159 void prepend_attribute(xml_attribute
<Ch
> *attribute
)
1161 assert(attribute
&& !attribute
->parent());
1162 if (first_attribute())
1164 attribute
->m_next_attribute
= m_first_attribute
;
1165 m_first_attribute
->m_prev_attribute
= attribute
;
1169 attribute
->m_next_attribute
= 0;
1170 m_last_attribute
= attribute
;
1172 m_first_attribute
= attribute
;
1173 attribute
->m_parent
= this;
1174 attribute
->m_prev_attribute
= 0;
1177 //! Appends a new attribute to the node.
1178 //! \param attribute Attribute to append.
1179 void append_attribute(xml_attribute
<Ch
> *attribute
)
1181 assert(attribute
&& !attribute
->parent());
1182 if (first_attribute())
1184 attribute
->m_prev_attribute
= m_last_attribute
;
1185 m_last_attribute
->m_next_attribute
= attribute
;
1189 attribute
->m_prev_attribute
= 0;
1190 m_first_attribute
= attribute
;
1192 m_last_attribute
= attribute
;
1193 attribute
->m_parent
= this;
1194 attribute
->m_next_attribute
= 0;
1197 //! Inserts a new attribute at specified place inside the node.
1198 //! All attributes after and including the specified attribute are moved one position back.
1199 //! \param where Place where to insert the attribute, or 0 to insert at the back.
1200 //! \param attribute Attribute to insert.
1201 void insert_attribute(xml_attribute
<Ch
> *where
, xml_attribute
<Ch
> *attribute
)
1203 assert(!where
|| where
->parent() == this);
1204 assert(attribute
&& !attribute
->parent());
1205 if (where
== m_first_attribute
)
1206 prepend_attribute(attribute
);
1207 else if (where
== 0)
1208 append_attribute(attribute
);
1211 attribute
->m_prev_attribute
= where
->m_prev_attribute
;
1212 attribute
->m_next_attribute
= where
;
1213 where
->m_prev_attribute
->m_next_attribute
= attribute
;
1214 where
->m_prev_attribute
= attribute
;
1215 attribute
->m_parent
= this;
1219 //! Removes first attribute of the node.
1220 //! If node has no attributes, behaviour is undefined.
1221 //! Use first_attribute() to test if node has attributes.
1222 void remove_first_attribute()
1224 assert(first_attribute());
1225 xml_attribute
<Ch
> *attribute
= m_first_attribute
;
1226 if (attribute
->m_next_attribute
)
1228 attribute
->m_next_attribute
->m_prev_attribute
= 0;
1231 m_last_attribute
= 0;
1232 attribute
->m_parent
= 0;
1233 m_first_attribute
= attribute
->m_next_attribute
;
1236 //! Removes last attribute of the node.
1237 //! If node has no attributes, behaviour is undefined.
1238 //! Use first_attribute() to test if node has attributes.
1239 void remove_last_attribute()
1241 assert(first_attribute());
1242 xml_attribute
<Ch
> *attribute
= m_last_attribute
;
1243 if (attribute
->m_prev_attribute
)
1245 attribute
->m_prev_attribute
->m_next_attribute
= 0;
1246 m_last_attribute
= attribute
->m_prev_attribute
;
1249 m_first_attribute
= 0;
1250 attribute
->m_parent
= 0;
1253 //! Removes specified attribute from node.
1254 //! \param where Pointer to attribute to be removed.
1255 void remove_attribute(xml_attribute
<Ch
> *where
)
1257 assert(first_attribute() && where
->parent() == this);
1258 if (where
== m_first_attribute
)
1259 remove_first_attribute();
1260 else if (where
== m_last_attribute
)
1261 remove_last_attribute();
1264 where
->m_prev_attribute
->m_next_attribute
= where
->m_next_attribute
;
1265 where
->m_next_attribute
->m_prev_attribute
= where
->m_prev_attribute
;
1266 where
->m_parent
= 0;
1270 //! Removes all attributes of node.
1271 void remove_all_attributes()
1273 for (xml_attribute
<Ch
> *attribute
= first_attribute(); attribute
; attribute
= attribute
->m_next_attribute
)
1274 attribute
->m_parent
= 0;
1275 m_first_attribute
= 0;
1280 ///////////////////////////////////////////////////////////////////////////
1284 xml_node(const xml_node
&);
1285 void operator =(const xml_node
&);
1287 ///////////////////////////////////////////////////////////////////////////
1290 // Note that some of the pointers below have UNDEFINED values if certain other pointers are 0.
1291 // This is required for maximum performance, as it allows the parser to omit initialization of
1292 // unneded/redundant values.
1294 // The rules are as follows:
1295 // 1. prev_sibling and next_sibling are valid only if node has a parent, otherwise they contain garbage
1296 // 2. last_child and last_attribute are valid only if node has one or more children/attributes respectively, otherwise they contain garbage
1297 // 3. Remaining pointers are always valid
1299 node_type m_type
; // Type of node; always valid
1300 xml_node
<Ch
> *m_first_child
; // Pointer to first child of node, or 0 if none; always valid
1301 xml_node
<Ch
> *m_last_child
; // Pointer to last child of node, or 0 if none; this value is only valid if m_first_child is non-zero
1302 xml_attribute
<Ch
> *m_first_attribute
; // Pointer to first attribute of node, or 0 if none; always valid
1303 xml_attribute
<Ch
> *m_last_attribute
; // Pointer to last attribute of node, or 0 if none; this value is only valid if m_first_attribute is non-zero
1304 xml_node
<Ch
> *m_prev_sibling
; // Pointer to previous sibling of node, or 0 if none; this value is only valid if m_parent is non-zero
1305 xml_node
<Ch
> *m_next_sibling
; // Pointer to next sibling of node, or 0 if none; this value is only valid if m_parent is non-zero
1309 ///////////////////////////////////////////////////////////////////////////
1312 //! This class represents a root of the DOM hierarchy.
1313 //! It is also an xml_node and a memory_pool through public inheritance.
1314 //! Use parse() function to build a DOM tree from a zero-terminated XML text string.
1315 //! parse() function allocates memory for nodes and attributes by using functions of xml_document,
1316 //! which are inherited from memory_pool.
1317 //! To access root node of the document, use the document itself, as if it was an xml_node.
1318 //! \param Ch Character type to use.
1319 //! \param StaticBlockSize Size of static block to use for allocating nodes; no dynamic memory allocations occur until this block is exhausted.
1320 template<class Ch
, int StaticBlockSize
= 128 * 1024>
1321 class xml_document
: public xml_node
<Ch
>, public memory_pool
<Ch
, StaticBlockSize
>
1326 //! Constructs empty XML document
1328 : xml_node
<Ch
>(node_document
)
1332 //! Parses zero-terminated XML string according to given flags.
1333 //! Passed string will be modified by the parser, unless parse_non_destructive flag is used.
1334 //! The string must persist for the lifetime of the document.
1335 //! In case of error, parse_error exception will be thrown.
1337 //! If you want to parse contents of a file, you must first load the file into the memory, and pass pointer to its beginning.
1338 //! Make sure that data is zero-terminated.
1339 //! \param text XML data to parse; pointer is non-const to denote fact that this data may be modified by the parser.
1341 void parse(Ch
*text
)
1348 // Parse BOM, if any
1349 parse_bom
<Flags
>(text
);
1354 // Skip whitespace before node
1355 skip
<whitespace_pred
, Flags
>(text
);
1359 // Parse and append new child
1360 if (*text
== Ch('<'))
1363 if (xml_node
<Ch
> *node
= parse_node
<Flags
>(text
))
1364 this->append_child(node
);
1367 RAPIDXML_PARSE_ERROR("expected <", text
);
1372 //! Clears the document by deleting all nodes and clearing the memory pool.
1373 //! All nodes owned by document pool are destroyed.
1376 this->remove_all_children();
1377 this->remove_all_attributes();
1378 memory_pool
<Ch
, StaticBlockSize
>::clear();
1383 ///////////////////////////////////////////////////////////////////////
1384 // Internal character utility functions
1386 // Detect whitespace character
1387 struct whitespace_pred
1389 static unsigned char test(Ch ch
)
1391 return internal::lookup_tables
<0>::lookup_whitespace
[static_cast<unsigned char>(ch
)];
1395 // Detect node name character
1396 struct node_name_pred
1398 static unsigned char test(Ch ch
)
1400 return internal::lookup_tables
<0>::lookup_node_name
[static_cast<unsigned char>(ch
)];
1404 // Detect attribute name character
1405 struct attribute_name_pred
1407 static unsigned char test(Ch ch
)
1409 return internal::lookup_tables
<0>::lookup_attribute_name
[static_cast<unsigned char>(ch
)];
1413 // Detect text character (PCDATA)
1416 static unsigned char test(Ch ch
)
1418 return internal::lookup_tables
<0>::lookup_text
[static_cast<unsigned char>(ch
)];
1422 // Detect text character (PCDATA) that does not require processing
1423 struct text_pure_no_ws_pred
1425 static unsigned char test(Ch ch
)
1427 return internal::lookup_tables
<0>::lookup_text_pure_no_ws
[static_cast<unsigned char>(ch
)];
1431 // Detect text character (PCDATA) that does not require processing
1432 struct text_pure_with_ws_pred
1434 static unsigned char test(Ch ch
)
1436 return internal::lookup_tables
<0>::lookup_text_pure_with_ws
[static_cast<unsigned char>(ch
)];
1440 // Detect attribute value character
1442 struct attribute_value_pred
1444 static unsigned char test(Ch ch
)
1446 if (Quote
== Ch('\''))
1447 return internal::lookup_tables
<0>::lookup_attribute_data_1
[static_cast<unsigned char>(ch
)];
1448 if (Quote
== Ch('\"'))
1449 return internal::lookup_tables
<0>::lookup_attribute_data_2
[static_cast<unsigned char>(ch
)];
1453 // Detect attribute value character
1455 struct attribute_value_pure_pred
1457 static unsigned char test(Ch ch
)
1459 if (Quote
== Ch('\''))
1460 return internal::lookup_tables
<0>::lookup_attribute_data_1_pure
[static_cast<unsigned char>(ch
)];
1461 if (Quote
== Ch('\"'))
1462 return internal::lookup_tables
<0>::lookup_attribute_data_2_pure
[static_cast<unsigned char>(ch
)];
1466 // Insert coded character, using UTF8 or 8-bit ASCII
1468 static void insert_coded_character(Ch
*&text
, unsigned long code
)
1470 if (Flags
& parse_no_utf8
)
1472 // Insert 8-bit ASCII character
1473 // Todo: possibly verify that code is less than 256 and use replacement char otherwise?
1474 text
[0] = static_cast<unsigned char>(code
);
1479 // Insert UTF8 sequence
1480 if (code
< 0x80) // 1 byte sequence
1482 text
[0] = static_cast<unsigned char>(code
);
1485 else if (code
< 0x800) // 2 byte sequence
1487 text
[1] = static_cast<unsigned char>((code
| 0x80) & 0xBF); code
>>= 6;
1488 text
[0] = static_cast<unsigned char>(code
| 0xC0);
1491 else if (code
< 0x10000) // 3 byte sequence
1493 text
[2] = static_cast<unsigned char>((code
| 0x80) & 0xBF); code
>>= 6;
1494 text
[1] = static_cast<unsigned char>((code
| 0x80) & 0xBF); code
>>= 6;
1495 text
[0] = static_cast<unsigned char>(code
| 0xE0);
1498 else if (code
< 0x110000) // 4 byte sequence
1500 text
[3] = static_cast<unsigned char>((code
| 0x80) & 0xBF); code
>>= 6;
1501 text
[2] = static_cast<unsigned char>((code
| 0x80) & 0xBF); code
>>= 6;
1502 text
[1] = static_cast<unsigned char>((code
| 0x80) & 0xBF); code
>>= 6;
1503 text
[0] = static_cast<unsigned char>(code
| 0xF0);
1506 else // Invalid, only codes up to 0x10FFFF are allowed in Unicode
1508 RAPIDXML_PARSE_ERROR("invalid numeric character entity", text
);
1513 // Skip characters until predicate evaluates to true
1514 template<class StopPred
, int Flags
>
1515 static void skip(Ch
*&text
)
1518 while (StopPred::test(*tmp
))
1523 // Skip characters until predicate evaluates to true while doing the following:
1524 // - replacing XML character entity references with proper characters (' & " < > &#...;)
1525 // - condensing whitespace sequences to single space character
1526 template<class StopPred
, class StopPredPure
, int Flags
>
1527 static Ch
*skip_and_expand_character_refs(Ch
*&text
)
1529 // If entity translation and whitespace condense is disabled, use plain skip
1530 if (Flags
& parse_no_entity_translation
&&
1531 !(Flags
& parse_normalize_whitespace
))
1533 skip
<StopPred
, Flags
>(text
);
1537 // Use simple skip until first modification is detected
1538 skip
<StopPredPure
, Flags
>(text
);
1540 // Use translation skip
1543 while (StopPred::test(*src
))
1545 // If entity translation is enabled
1546 if (!(Flags
& parse_no_entity_translation
))
1548 // Test if replacement is needed
1549 if (src
[0] == Ch('&'))
1556 if (src
[2] == Ch('m') && src
[3] == Ch('p') && src
[4] == Ch(';'))
1563 if (src
[2] == Ch('p') && src
[3] == Ch('o') && src
[4] == Ch('s') && src
[5] == Ch(';'))
1574 if (src
[2] == Ch('u') && src
[3] == Ch('o') && src
[4] == Ch('t') && src
[5] == Ch(';'))
1585 if (src
[2] == Ch('t') && src
[3] == Ch(';'))
1596 if (src
[2] == Ch('t') && src
[3] == Ch(';'))
1605 // &#...; - assumes ASCII
1607 if (src
[2] == Ch('x'))
1609 unsigned long code
= 0;
1610 src
+= 3; // Skip &#x
1613 unsigned char digit
= internal::lookup_tables
<0>::lookup_digits
[static_cast<unsigned char>(*src
)];
1616 code
= code
* 16 + digit
;
1619 insert_coded_character
<Flags
>(dest
, code
); // Put character in output
1623 unsigned long code
= 0;
1624 src
+= 2; // Skip &#
1627 unsigned char digit
= internal::lookup_tables
<0>::lookup_digits
[static_cast<unsigned char>(*src
)];
1630 code
= code
* 10 + digit
;
1633 insert_coded_character
<Flags
>(dest
, code
); // Put character in output
1635 if (*src
== Ch(';'))
1638 RAPIDXML_PARSE_ERROR("expected ;", src
);
1643 // Ignore, just copy '&' verbatim
1650 // If whitespace condensing is enabled
1651 if (Flags
& parse_normalize_whitespace
)
1653 // Test if condensing is needed
1654 if (whitespace_pred::test(*src
))
1656 *dest
= Ch(' '); ++dest
; // Put single space in dest
1657 ++src
; // Skip first whitespace char
1658 // Skip remaining whitespace chars
1659 while (whitespace_pred::test(*src
))
1665 // No replacement, only copy character
1676 ///////////////////////////////////////////////////////////////////////
1677 // Internal parsing functions
1679 // Parse BOM, if any
1681 void parse_bom(Ch
*&text
)
1684 if (static_cast<unsigned char>(text
[0]) == 0xEF &&
1685 static_cast<unsigned char>(text
[1]) == 0xBB &&
1686 static_cast<unsigned char>(text
[2]) == 0xBF)
1688 text
+= 3; // Skup utf-8 bom
1692 // Parse XML declaration (<?xml...)
1694 xml_node
<Ch
> *parse_xml_declaration(Ch
*&text
)
1696 // If parsing of declaration is disabled
1697 if (!(Flags
& parse_declaration_node
))
1699 // Skip until end of declaration
1700 while (text
[0] != Ch('?') || text
[1] != Ch('>'))
1703 RAPIDXML_PARSE_ERROR("unexpected end of data", text
);
1706 text
+= 2; // Skip '?>'
1710 // Create declaration
1711 xml_node
<Ch
> *declaration
= this->allocate_node(node_declaration
);
1713 // Skip whitespace before attributes or ?>
1714 skip
<whitespace_pred
, Flags
>(text
);
1716 // Parse declaration attributes
1717 parse_node_attributes
<Flags
>(text
, declaration
);
1720 if (text
[0] != Ch('?') || text
[1] != Ch('>'))
1721 RAPIDXML_PARSE_ERROR("expected ?>", text
);
1727 // Parse XML comment (<!--...)
1729 xml_node
<Ch
> *parse_comment(Ch
*&text
)
1731 // If parsing of comments is disabled
1732 if (!(Flags
& parse_comment_nodes
))
1734 // Skip until end of comment
1735 while (text
[0] != Ch('-') || text
[1] != Ch('-') || text
[2] != Ch('>'))
1738 RAPIDXML_PARSE_ERROR("unexpected end of data", text
);
1741 text
+= 3; // Skip '-->'
1742 return 0; // Do not produce comment node
1745 // Remember value start
1748 // Skip until end of comment
1749 while (text
[0] != Ch('-') || text
[1] != Ch('-') || text
[2] != Ch('>'))
1752 RAPIDXML_PARSE_ERROR("unexpected end of data", text
);
1756 // Create comment node
1757 xml_node
<Ch
> *comment
= this->allocate_node(node_comment
);
1758 comment
->value(value
, text
- value
);
1760 // Place zero terminator after comment value
1761 if (!(Flags
& parse_no_string_terminators
))
1764 text
+= 3; // Skip '-->'
1770 xml_node
<Ch
> *parse_doctype(Ch
*&text
)
1772 // Remember value start
1776 while (*text
!= Ch('>'))
1778 // Determine character type
1782 // If '[' encountered, scan for matching ending ']' using naive algorithm with depth
1783 // This works for all W3C test files except for 2 most wicked
1792 case Ch('['): ++depth
; break;
1793 case Ch(']'): --depth
; break;
1794 case 0: RAPIDXML_PARSE_ERROR("unexpected end of data", text
);
1801 // Error on end of text
1803 RAPIDXML_PARSE_ERROR("unexpected end of data", text
);
1805 // Other character, skip it
1812 // If DOCTYPE nodes enabled
1813 if (Flags
& parse_doctype_node
)
1815 // Create a new doctype node
1816 xml_node
<Ch
> *doctype
= this->allocate_node(node_doctype
);
1817 doctype
->value(value
, text
- value
);
1819 // Place zero terminator after value
1820 if (!(Flags
& parse_no_string_terminators
))
1823 text
+= 1; // skip '>'
1828 text
+= 1; // skip '>'
1836 xml_node
<Ch
> *parse_pi(Ch
*&text
)
1838 // If creation of PI nodes is enabled
1839 if (Flags
& parse_pi_nodes
)
1842 xml_node
<Ch
> *pi
= this->allocate_node(node_pi
);
1844 // Extract PI target name
1846 skip
<node_name_pred
, Flags
>(text
);
1848 RAPIDXML_PARSE_ERROR("expected PI target", text
);
1849 pi
->name(name
, text
- name
);
1851 // Skip whitespace between pi target and pi
1852 skip
<whitespace_pred
, Flags
>(text
);
1854 // Remember start of pi
1858 while (text
[0] != Ch('?') || text
[1] != Ch('>'))
1860 if (*text
== Ch('\0'))
1861 RAPIDXML_PARSE_ERROR("unexpected end of data", text
);
1865 // Set pi value (verbatim, no entity expansion or whitespace normalization)
1866 pi
->value(value
, text
- value
);
1868 // Place zero terminator after name and value
1869 if (!(Flags
& parse_no_string_terminators
))
1871 pi
->name()[pi
->name_size()] = Ch('\0');
1872 pi
->value()[pi
->value_size()] = Ch('\0');
1875 text
+= 2; // Skip '?>'
1881 while (text
[0] != Ch('?') || text
[1] != Ch('>'))
1883 if (*text
== Ch('\0'))
1884 RAPIDXML_PARSE_ERROR("unexpected end of data", text
);
1887 text
+= 2; // Skip '?>'
1892 // Parse and append data
1893 // Return character that ends data.
1894 // This is necessary because this character might have been overwritten by a terminating 0
1896 Ch
parse_and_append_data(xml_node
<Ch
> *node
, Ch
*&text
, Ch
*contents_start
)
1899 if (Flags
& parse_normalize_whitespace
)
1902 end
= skip_and_expand_character_refs
<text_pred
, text_pure_with_ws_pred
, Flags
>(text
); // Skip until end of data
1906 value
= contents_start
; // Back up to to start of whitespace
1907 end
= skip_and_expand_character_refs
<text_pred
, text_pure_no_ws_pred
, Flags
>(text
); // Skip until end of data
1910 // If data present at all
1913 // Trim trailing whitespace, leading was already trimmed by whitespace skip after >
1914 // It is already condensed to single space characters by skipping function, so just trim 1 char off the end
1915 if ((Flags
& parse_normalize_whitespace
) && *(end
- 1) == Ch(' '))
1918 // If characters are still left between end and value (this test is only necessary if normalization is enabled)
1919 if (!(Flags
& parse_normalize_whitespace
) || end
> value
)
1921 // Create new data node
1922 if (!(Flags
& parse_no_data_nodes
))
1924 xml_node
<Ch
> *data
= this->allocate_node(node_data
);
1925 data
->value(value
, end
- value
);
1926 node
->append_child(data
);
1929 // Add data to parent node if no data exists yet
1930 if (!(Flags
& parse_no_element_values
))
1931 if (*node
->value() == Ch('\0'))
1932 node
->value(value
, end
- value
);
1934 // Place zero terminator after value
1935 if (!(Flags
& parse_no_string_terminators
))
1939 return ch
; // Return character that ends data
1944 // Return character that ends data
1950 xml_node
<Ch
> *parse_cdata(Ch
*&text
)
1952 // If CDATA is disabled
1953 if (Flags
& parse_no_data_nodes
)
1955 // Skip until end of cdata
1956 while (text
[0] != Ch(']') || text
[1] != Ch(']') || text
[2] != Ch('>'))
1959 RAPIDXML_PARSE_ERROR("unexpected end of data", text
);
1962 text
+= 3; // Skip ]]>
1963 return 0; // Do not produce CDATA node
1966 // Skip until end of cdata
1968 while (text
[0] != Ch(']') || text
[1] != Ch(']') || text
[2] != Ch('>'))
1971 RAPIDXML_PARSE_ERROR("unexpected end of data", text
);
1975 // Create new cdata node
1976 xml_node
<Ch
> *cdata
= this->allocate_node(node_cdata
);
1977 cdata
->value(value
, text
- value
);
1979 // Place zero terminator after value
1980 if (!(Flags
& parse_no_string_terminators
))
1983 text
+= 3; // Skip ]]>
1987 // Parse element node
1989 xml_node
<Ch
> *parse_element(Ch
*&text
)
1991 // Create element node
1992 xml_node
<Ch
> *element
= this->allocate_node(node_element
);
1994 // Extract element name
1996 skip
<node_name_pred
, Flags
>(text
);
1998 RAPIDXML_PARSE_ERROR("expected element name", text
);
1999 element
->name(name
, text
- name
);
2001 // Skip whitespace between element name and attributes or >
2002 skip
<whitespace_pred
, Flags
>(text
);
2004 // Parse attributes, if any
2005 parse_node_attributes
<Flags
>(text
, element
);
2007 // Determine ending type
2008 if (*text
== Ch('>'))
2011 parse_node_contents
<Flags
>(text
, element
);
2013 else if (*text
== Ch('/'))
2016 if (*text
!= Ch('>'))
2017 RAPIDXML_PARSE_ERROR("expected >", text
);
2021 RAPIDXML_PARSE_ERROR("expected >", text
);
2023 // Place zero terminator after name
2024 if (!(Flags
& parse_no_string_terminators
))
2025 element
->name()[element
->name_size()] = Ch('\0');
2027 // Return parsed element
2031 // Determine node type, and parse it
2033 xml_node
<Ch
> *parse_node(Ch
*&text
)
2035 // Parse proper node type
2041 // Parse and append element node
2042 return parse_element
<Flags
>(text
);
2047 if ((text
[0] == Ch('x') || text
[0] == Ch('X')) &&
2048 (text
[1] == Ch('m') || text
[1] == Ch('M')) &&
2049 (text
[2] == Ch('l') || text
[2] == Ch('L')) &&
2050 whitespace_pred::test(text
[3]))
2052 // '<?xml ' - xml declaration
2053 text
+= 4; // Skip 'xml '
2054 return parse_xml_declaration
<Flags
>(text
);
2059 return parse_pi
<Flags
>(text
);
2065 // Parse proper subset of <! node
2071 if (text
[2] == Ch('-'))
2073 // '<!--' - xml comment
2074 text
+= 3; // Skip '!--'
2075 return parse_comment
<Flags
>(text
);
2081 if (text
[2] == Ch('C') && text
[3] == Ch('D') && text
[4] == Ch('A') &&
2082 text
[5] == Ch('T') && text
[6] == Ch('A') && text
[7] == Ch('['))
2084 // '<![CDATA[' - cdata
2085 text
+= 8; // Skip '![CDATA['
2086 return parse_cdata
<Flags
>(text
);
2092 if (text
[2] == Ch('O') && text
[3] == Ch('C') && text
[4] == Ch('T') &&
2093 text
[5] == Ch('Y') && text
[6] == Ch('P') && text
[7] == Ch('E') &&
2094 whitespace_pred::test(text
[8]))
2096 // '<!DOCTYPE ' - doctype
2097 text
+= 9; // skip '!DOCTYPE '
2098 return parse_doctype
<Flags
>(text
);
2103 // Attempt to skip other, unrecognized node types starting with <!
2105 while (*text
!= Ch('>'))
2108 RAPIDXML_PARSE_ERROR("unexpected end of data", text
);
2112 return 0; // No node recognized
2117 // Parse contents of the node - children, data etc.
2119 void parse_node_contents(Ch
*&text
, xml_node
<Ch
> *node
)
2121 // For all children and text
2124 // Skip whitespace between > and node contents
2125 Ch
*contents_start
= text
;
2126 skip
<whitespace_pred
, Flags
>(text
);
2127 Ch next_char
= *text
;
2129 // After data nodes, instead of continuing the loop, control jumps here.
2130 // This is because zero termination inside parse_and_append_data() function
2131 // would wreak havoc with the above code.
2132 // Also, skipping whitespace after data nodes is unnecessary.
2135 // Determine what comes next: node closing, child node, data node, or 0?
2139 // Node closing or child node
2141 if (text
[1] == Ch('/'))
2144 text
+= 2; // Skip '</'
2145 if (Flags
& parse_validate_closing_tags
)
2147 // Skip and validate closing tag name
2148 Ch
*closing_name
= text
;
2149 skip
<node_name_pred
, Flags
>(text
);
2150 if (!internal::compare(node
->name(), node
->name_size(), closing_name
, text
- closing_name
))
2151 RAPIDXML_PARSE_ERROR("invalid closing tag name", text
);
2155 // No validation, just skip name
2156 skip
<node_name_pred
, Flags
>(text
);
2158 // Skip remaining whitespace after node name
2159 skip
<whitespace_pred
, Flags
>(text
);
2160 if (*text
!= Ch('>'))
2161 RAPIDXML_PARSE_ERROR("expected >", text
);
2163 return; // Node closed, finished parsing contents
2169 if (xml_node
<Ch
> *child
= parse_node
<Flags
>(text
))
2170 node
->append_child(child
);
2174 // End of data - error
2176 RAPIDXML_PARSE_ERROR("unexpected end of data", text
);
2181 next_char
= parse_and_append_data
<Flags
>(node
, text
, contents_start
);
2182 goto after_data_node
; // Bypass regular processing after data nodes
2188 // Parse XML attributes of the node
2190 void parse_node_attributes(Ch
*&text
, xml_node
<Ch
> *node
)
2192 // For all attributes
2193 while (attribute_name_pred::test(*text
))
2195 // Extract attribute name
2197 ++text
; // Skip first character of attribute name
2198 skip
<attribute_name_pred
, Flags
>(text
);
2200 RAPIDXML_PARSE_ERROR("expected attribute name", name
);
2202 // Create new attribute
2203 xml_attribute
<Ch
> *attribute
= this->allocate_attribute();
2204 attribute
->name(name
, text
- name
);
2205 node
->append_attribute(attribute
);
2207 // Skip whitespace after attribute name
2208 skip
<whitespace_pred
, Flags
>(text
);
2211 if (*text
!= Ch('='))
2212 RAPIDXML_PARSE_ERROR("expected =", text
);
2215 // Add terminating zero after name
2216 if (!(Flags
& parse_no_string_terminators
))
2217 attribute
->name()[attribute
->name_size()] = 0;
2219 // Skip whitespace after =
2220 skip
<whitespace_pred
, Flags
>(text
);
2222 // Skip quote and remember if it was ' or "
2224 if (quote
!= Ch('\'') && quote
!= Ch('"'))
2225 RAPIDXML_PARSE_ERROR("expected ' or \"", text
);
2228 // Extract attribute value and expand char refs in it
2229 Ch
*value
= text
, *end
;
2230 const int AttFlags
= Flags
& ~parse_normalize_whitespace
; // No whitespace normalization in attributes
2231 if (quote
== Ch('\''))
2232 end
= skip_and_expand_character_refs
<attribute_value_pred
<Ch('\'')>, attribute_value_pure_pred
<Ch('\'')>, AttFlags
>(text
);
2234 end
= skip_and_expand_character_refs
<attribute_value_pred
<Ch('"')>, attribute_value_pure_pred
<Ch('"')>, AttFlags
>(text
);
2236 // Set attribute value
2237 attribute
->value(value
, end
- value
);
2239 // Make sure that end quote is present
2241 RAPIDXML_PARSE_ERROR("expected ' or \"", text
);
2242 ++text
; // Skip quote
2244 // Add terminating zero after value
2245 if (!(Flags
& parse_no_string_terminators
))
2246 attribute
->value()[attribute
->value_size()] = 0;
2248 // Skip whitespace after attribute value
2249 skip
<whitespace_pred
, Flags
>(text
);
2259 // Whitespace (space \n \r \t)
2261 const unsigned char lookup_tables
<Dummy
>::lookup_whitespace
[256] =
2263 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
2264 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0
2265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1
2266 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2
2267 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3
2268 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4
2269 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5
2270 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6
2271 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 7
2272 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
2273 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
2274 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
2275 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
2276 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // C
2277 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // D
2278 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // E
2279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F
2282 // Node name (anything but space \n \r \t / > ? \0)
2284 const unsigned char lookup_tables
<Dummy
>::lookup_node_name
[256] =
2286 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
2287 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0
2288 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
2289 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2
2290 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, // 3
2291 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
2292 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
2293 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
2294 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
2295 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8
2296 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9
2297 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A
2298 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B
2299 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C
2300 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D
2301 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E
2302 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F
2305 // Text (i.e. PCDATA) (anything but < \0)
2307 const unsigned char lookup_tables
<Dummy
>::lookup_text
[256] =
2309 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
2310 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
2311 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
2312 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
2313 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3
2314 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
2315 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
2316 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
2317 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
2318 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8
2319 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9
2320 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A
2321 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B
2322 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C
2323 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D
2324 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E
2325 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F
2328 // Text (i.e. PCDATA) that does not require processing when ws normalization is disabled
2329 // (anything but < \0 &)
2331 const unsigned char lookup_tables
<Dummy
>::lookup_text_pure_no_ws
[256] =
2333 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
2334 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
2335 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
2336 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
2337 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3
2338 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
2339 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
2340 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
2341 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
2342 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8
2343 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9
2344 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A
2345 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B
2346 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C
2347 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D
2348 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E
2349 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F
2352 // Text (i.e. PCDATA) that does not require processing when ws normalizationis is enabled
2353 // (anything but < \0 & space \n \r \t)
2355 const unsigned char lookup_tables
<Dummy
>::lookup_text_pure_with_ws
[256] =
2357 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
2358 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0
2359 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
2360 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
2361 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3
2362 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
2363 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
2364 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
2365 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
2366 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8
2367 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9
2368 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A
2369 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B
2370 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C
2371 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D
2372 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E
2373 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F
2376 // Attribute name (anything but space \n \r \t / < > = ? ! \0)
2378 const unsigned char lookup_tables
<Dummy
>::lookup_attribute_name
[256] =
2380 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
2381 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0
2382 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
2383 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2
2384 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, // 3
2385 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
2386 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
2387 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
2388 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
2389 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8
2390 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9
2391 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A
2392 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B
2393 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C
2394 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D
2395 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E
2396 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F
2399 // Attribute data with single quote (anything but ' \0)
2401 const unsigned char lookup_tables
<Dummy
>::lookup_attribute_data_1
[256] =
2403 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
2404 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
2405 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
2406 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2
2407 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
2408 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
2409 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
2410 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
2411 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
2412 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8
2413 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9
2414 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A
2415 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B
2416 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C
2417 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D
2418 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E
2419 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F
2422 // Attribute data with single quote that does not require processing (anything but ' \0 &)
2424 const unsigned char lookup_tables
<Dummy
>::lookup_attribute_data_1_pure
[256] =
2426 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
2427 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
2428 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
2429 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2
2430 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
2431 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
2432 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
2433 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
2434 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
2435 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8
2436 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9
2437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A
2438 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B
2439 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C
2440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D
2441 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E
2442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F
2445 // Attribute data with double quote (anything but " \0)
2447 const unsigned char lookup_tables
<Dummy
>::lookup_attribute_data_2
[256] =
2449 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
2450 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
2451 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
2452 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
2453 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
2454 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
2455 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
2456 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
2457 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
2458 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8
2459 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9
2460 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A
2461 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B
2462 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C
2463 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D
2464 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E
2465 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F
2468 // Attribute data with double quote that does not require processing (anything but " \0 &)
2470 const unsigned char lookup_tables
<Dummy
>::lookup_attribute_data_2_pure
[256] =
2472 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
2473 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
2474 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
2475 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
2476 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
2477 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
2478 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
2479 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
2480 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
2481 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8
2482 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9
2483 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A
2484 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B
2485 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C
2486 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D
2487 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E
2488 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F
2491 // Digits (dec and hex, 255 denotes end of numeric character reference)
2493 const unsigned char lookup_tables
<Dummy
>::lookup_digits
[256] =
2495 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
2496 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 0
2497 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 1
2498 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 2
2499 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,255,255,255,255,255,255, // 3
2500 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 4
2501 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 5
2502 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 6
2503 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 7
2504 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 8
2505 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 9
2506 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // A
2507 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // B
2508 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // C
2509 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // D
2510 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // E
2511 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 // F
2519 // Undefine internal macros
2520 #undef RAPIDXML_PARSE_ERROR
2522 // On MSVC, restore warnings state
2524 #pragma warning(pop)