1 ///////////////////////////////////////////////////////////////////////////////
3 // Pug XML Parser - Version 1.0002
4 // --------------------------------------------------------
5 // Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
6 // Released into the Public Domain. Use at your own risk.
7 // See pugxml.xml for further information, history, etc.
8 // Contributions by Neville Franks (readonly@getsoft.com).
10 // Modified to suit boost::property_tree library by Marcin Kalicinski
12 #ifndef BOOST_PROPERTY_TREE_DETAIL_PUGXML_HPP_INCLUDED
13 #define BOOST_PROPERTY_TREE_DETAIL_PUGXML_HPP_INCLUDED
16 #define UNDEF_TCHAR_AND_REST
18 #define _tcslen strlen
19 #define _istalnum isalnum
20 #define _tcsncpy strncpy
21 #define _tcscpy strcpy
22 #define _tcscmp strcmp
23 #define _tcstol strtol
24 #define _tcstod strtod
25 #define _tcstok strtok
26 #define _stprintf sprintf
30 //#define PUGOPT_MEMFIL //Uncomment to enable memory-mapped file parsing support.
31 //#define PUGOPT_NONSEG //Uncomment to enable non-destructive (non-segmenting) parsing support.
34 # ifndef PUGOPT_NONSEG
35 # define PUGOPT_NONSEG //PUGOPT_MEMFIL implies PUGOPT_NONSEG.
43 #if defined(PUGOPT_MEMFIL) | defined(PUGOPT_NONSEG)
48 # define UNDEF_LOHIWORD
49 # define HIWORD(X) ((unsigned short)((unsigned long)(X)>>16))
50 # define LOWORD(X) ((unsigned short)((unsigned long)(X)&0xFFFF))
54 // Library variant ID. The ID 0x58475550 is owned by Kristen Wegner. You *MUST*
55 // provide your own unique ID if you modify or fork the code in this library to
56 // your own purposes. If you change this then *you* are now the maintainer, not me.
57 // Change also in the package section of pugxml.xml, and append yourself to the
60 #define PUGAPI_INTERNAL_VARIANT 0xdeadbeef
61 //<summary>Major version. Increment for each major release. Only change if you own the variant.</summary>
62 #define PUGAPI_INTERNAL_VERSION_MAJOR 1
63 //<summary>Minor version. Increment for each minor release. Only change if you own the variant ID.</summary>
64 #define PUGAPI_INTERNAL_VERSION_MINOR 2
66 #define PUGAPI_INTERNAL_VERSION ((PUGAPI_INTERNAL_VERSION_MINOR&0xFFFF)|PUGAPI_INTERNAL_VERSION_MAJOR<<16)
68 #define PUGDEF_ATTR_NAME_SIZE 128
69 #define PUGDEF_ATTR_VALU_SIZE 256
70 #define PUGDEF_ELEM_NAME_SIZE 256
72 //<summary>The PugXML Parser namespace.</summary>
73 namespace boost
{ namespace property_tree
{ namespace xml_parser
{ namespace pug
76 //<summary>The Library Variant ID. See PUGAPI_INTERNAL_VARIANT for an explanation.</summary>
77 //<returns>The current Library Variant ID.</returns>
78 inline static unsigned long lib_variant(){ return PUGAPI_INTERNAL_VARIANT
; }
79 //<summary>The library version. High word is major version. Low word is minor version.</summary>
80 //<returns>The current Library Version.</returns>
81 inline static unsigned long lib_version(){ return PUGAPI_INTERNAL_VERSION
; }
84 //<summary>A 'name=value' XML attribute structure.</summary>
85 typedef struct t_xml_attribute_struct
87 TCHAR
* name
; //Pointer to attribute name.
88 bool name_insitu
; //True if 'name' is a segment of the original parse string.
90 unsigned int name_size
; //Length of element name.
92 TCHAR
* value
; //Pointer to attribute value.
93 bool value_insitu
; //True if 'value' is a segment of the original parse string.
95 unsigned int value_size
; //Length of element name.
101 //<summary>Tree node classification.</summary>
102 //<remarks>See 'xml_node_struct::type'.</remarks>
103 typedef enum t_xml_node_type
105 node_null
, //An undifferentiated entity.
106 node_document
, //A document tree's absolute root.
107 node_element
, //E.g. '<...>'
108 node_pcdata
, //E.g. '>...<'
109 node_cdata
, //E.g. '<![CDATA[...]]>'
110 node_comment
, //E.g. '<!--...-->'
111 node_pi
, //E.g. '<?...?>'
112 node_include
, //E.g. '<![INCLUDE[...]]>'
113 node_doctype
, //E.g. '<!DOCTYPE ...>'.
114 node_dtd_entity
, //E.g. '<!ENTITY ...>'.
115 node_dtd_attlist
, //E.g. '<!ATTLIST ...>'.
116 node_dtd_element
, //E.g. '<!ELEMENT ...>'.
117 node_dtd_notation
//E.g. '<!NOTATION ...>'.
122 static const unsigned long parse_grow
= 4; //Default child element & attribute space growth increment.
126 static const unsigned long parse_minimal
= 0x00000000; //Unset the following flags.
127 static const unsigned long parse_pi
= 0x00000002; //Parse '<?...?>'
128 static const unsigned long parse_doctype
= 0x00000004; //Parse '<!DOCTYPE ...>' section, setting '[...]' as data member.
129 static const unsigned long parse_comments
= 0x00000008; //Parse <!--...-->'
130 static const unsigned long parse_cdata
= 0x00000010; //Parse '<![CDATA[...]]>', and/or '<![INCLUDE[...]]>'
131 static const unsigned long parse_escapes
= 0x00000020; //Not implemented.
132 static const unsigned long parse_trim_pcdata
= 0x00000040; //Trim '>...<'
133 static const unsigned long parse_trim_attribute
= 0x00000080; //Trim 'foo="..."'.
134 static const unsigned long parse_trim_cdata
= 0x00000100; //Trim '<![CDATA[...]]>', and/or '<![INCLUDE[...]]>'
135 static const unsigned long parse_trim_entity
= 0x00000200; //Trim '<!ENTITY name ...>', etc.
136 static const unsigned long parse_trim_doctype
= 0x00000400; //Trim '<!DOCTYPE [...]>'
137 static const unsigned long parse_trim_comment
= 0x00000800; //Trim <!--...-->'
138 static const unsigned long parse_wnorm
= 0x00001000; //Normalize all entities that are flagged to be trimmed.
139 static const unsigned long parse_dtd
= 0x00002000; //If parse_doctype set, then parse whatever is in data member ('[...]').
140 static const unsigned long parse_dtd_only
= 0x00004000; //If parse_doctype|parse_dtd set, then parse only '<!DOCTYPE [*]>'
141 static const unsigned long parse_default
= 0x0000FFFF;
142 static const unsigned long parse_noset
= 0x80000000;
145 //<summary>An XML document tree node.</summary>
146 typedef struct t_xml_node_struct
148 t_xml_node_struct
* parent
; //Pointer to parent
149 TCHAR
* name
; //Pointer to element name.
151 unsigned int name_size
; //Length of element name. Since 19 Jan 2003 NF.
153 bool name_insitu
; //True if 'name' is a segment of the original parse string.
154 xml_node_type type
; //Node type; see xml_node_type.
155 unsigned int attributes
; //Count attributes.
156 unsigned int attribute_space
; //Available pointer space in 'attribute'.
157 xml_attribute_struct
** attribute
; //Array of pointers to attributes; see xml_attribute_struct.
158 unsigned int children
; //Count children in member 'child'.
159 unsigned int child_space
; //Available pointer space in 'child'.
160 t_xml_node_struct
** child
; //Array of pointers to children.
161 TCHAR
* value
; //Pointer to any associated string data.
163 unsigned int value_size
; //Length of element data. Since 19 Jan 2003 NF.
165 bool value_insitu
; //True if 'data' is a segment of the original parse string.
170 //<summary>Concatenate 'rhs' to 'lhs', growing 'rhs' if neccessary.</summary>
171 //<param name="lhs">Pointer to pointer to receiving string. Note: If '*lhs' is not null, it must have been dynamically allocated using 'malloc'.</param>
172 //<param name="rhs">Source.</param>
173 //<returns>Success if 'realloc' was successful.</returns>
174 //<remarks>'rhs' is resized and 'rhs' is concatenated to it.</remarks>
175 inline static bool strcatgrow(TCHAR
** lhs
,const TCHAR
* rhs
)
177 if(!*lhs
) //Null, so first allocate.
179 *lhs
= (TCHAR
*) malloc(1UL*sizeof(TCHAR
));
180 **lhs
= 0; //Zero-terminate.
182 size_t ulhs
= _tcslen(*lhs
);
183 size_t urhs
= _tcslen(rhs
);
184 TCHAR
* temp
= (TCHAR
*) realloc(*lhs
,(ulhs
+urhs
+1UL)*sizeof(TCHAR
));
185 if(!temp
) return false; //Realloc failed.
186 memcpy(temp
+ulhs
,rhs
,urhs
*sizeof(TCHAR
)); //Concatenate.
187 temp
[ulhs
+urhs
] = 0; //Terminate it.
193 inline static bool chartype_symbol(TCHAR c
) //Character is alphanumeric, -or- '_', -or- ':', -or- '-', -or- '.'.
194 { return (_istalnum(c
)||c
==_T('_')||c
==_T(':')||c
==_T('-')||c
==_T('.')); }
195 inline static bool chartype_space(TCHAR c
) //Character is greater than 0 or character is less than exclamation.
196 { return (c
>0 && c
<_T('!')); }
197 inline static bool chartype_enter(TCHAR c
) //Character is '<'.
198 { return (c
==_T('<')); }
199 inline static bool chartype_leave(TCHAR c
) //Character is '>'.
200 { return (c
==_T('>')); }
201 inline static bool chartype_close(TCHAR c
) //Character is '/'.
202 { return (c
==_T('/')); }
203 inline static bool chartype_equals(TCHAR c
) //Character is '='.
204 { return (c
==_T('=')); }
205 inline static bool chartype_special(TCHAR c
) //Character is '!'.
206 { return (c
==_T('!')); }
207 inline static bool chartype_pi(TCHAR c
) //Character is '?'.
208 { return (c
==_T('?')); }
209 inline static bool chartype_dash(TCHAR c
) //Character is '-'.
210 { return (c
==_T('-')); }
211 inline static bool chartype_quote(TCHAR c
) //Character is "‘" -or- ‘"‘.
212 { return (c
==_T('"')||c
==_T('\'')); }
213 inline static bool chartype_lbracket(TCHAR c
) //Character is '['.
214 { return (c
==_T('[')); }
215 inline static bool chartype_rbracket(TCHAR c
) //Character is ']'.
216 { return (c
==_T(']')); }
222 //<summary>Concatenate 'rhs' to 'lhs', growing 'lhs' if neccessary.</summary>
223 //<param name="lhs">Pointer to pointer to receiving string. Note: If '*lhs' is not null, it must have been dynamically allocated using 'malloc'.</param>
224 //<param name="rhs">Source.</param>
225 //<param name="lsize">Specifies the length of *lhs in bytes and returns its new length.</param>
226 //<param name="rsize">Specifies the length of *rhs in bytes.</param>
227 //<returns>Success if 'realloc' was successful.</returns>
228 //<remarks>'lhs' is resized and 'rhs' is concatenated to it.</remarks>
229 inline static bool strcatgrown_impl(TCHAR
** lhs
,const TCHAR
* rhs
,unsigned int& lsize
,unsigned int rsize
)
231 if(!*lhs
) //Null, allocate and copy.
233 *lhs
= (TCHAR
*) malloc(rsize
+sizeof(TCHAR
));
237 return false; //Allocate failed.
239 memcpy(*lhs
,rhs
,rsize
); //Concatenate.
240 *(*lhs
+ rsize
) = 0; //Terminate it.
243 else //Reallocate. NF I don't think this is right for MBCS, nor is code in 'StrCatGrow()'.
245 TCHAR
* temp
= (TCHAR
*) realloc(*lhs
,lsize
+ rsize
+ sizeof(TCHAR
));
246 if(!temp
) return false; //Realloc failed.
247 memcpy(temp
+lsize
,rhs
,rsize
); //Concatenate.
248 lsize
+= rsize
; //Set new length.
249 temp
[lsize
] = 0; //Terminate it.
255 //<summary>Concatenate 'rhs' to 'lhs', growing 'lhs' if neccessary.</summary>
256 //<param name="lhs">Pointer to pointer to receiving string. Note: If '*lhs' is not null, it must have been dynamically allocated using 'malloc'.</param>
257 //<param name="rhs">Source.</param>
258 //<param name="lsize">Specifies the length of *lhs in bytes and returns its new length.</param>
259 //<returns>Success if 'realloc' was successful.</returns>
260 //<remarks>'lhs' is resized and 'rhs' is concatenated to it.</remarks>
261 inline static bool strcatgrown(TCHAR
** lhs
,const TCHAR
* rhs
,unsigned int& lsize
)
263 const unsigned int rsize
= _tcslen(rhs
) * sizeof(TCHAR
);
264 return pug::strcatgrown_impl(lhs
,rhs
,lsize
,rsize
);
267 //<summary>Trim leading and trailing whitespace.</summary>
268 //<param name="s">Pointer to pointer to string.</param>
269 //<param name="len">Specifies the length of *s in bytes and returns its new length.</param>
270 //<returns>Success.</returns>
271 //<remarks>*s is modified to point to the first non-white character in the string.</remarks>
272 inline static bool strwtrim(TCHAR
** s
,unsigned int& len
)
274 if(!s
|| !*s
) return false;
275 TCHAR
* pse
= *s
+ len
;
276 while(*s
< pse
&& pug::chartype_space(**s
)) //Find first non-white character.
277 ++*s
; //As long as we hit whitespace, increment the string pointer.
278 for(; *s
< --pse
;) //As long as we hit whitespace, decrement.
280 if(!pug::chartype_space(*pse
))
293 //<summary>Trim leading and trailing whitespace.</summary>
294 //<param name="s">Pointer to pointer to string.</param>
295 //<returns>Success.</returns>
296 inline static bool strwtrim(TCHAR
** s
)
298 if(!s
|| !*s
) return false;
299 while(**s
> 0 && **s
< _T('!')) ++*s
; //As long as we hit whitespace, increment the string pointer.
300 const TCHAR
* temp
= *s
;
301 while(0 != *temp
++); //Find the terminating null.
302 long i
, n
= (long)(temp
-*s
-1);
303 --n
; //Start from the last string TCHAR.
304 for(i
=n
; (i
> -1) && (*s
)[i
] > 0 && (*s
)[i
] < _T('!'); --i
); //As long as we hit whitespace, decrement.
305 if(i
<n
) (*s
)[i
+1] = 0; //Zero-terminate.
311 // In situ trim leading and trailing whitespace, then convert all consecutive
312 // whitespace to a single space TCHAR.
314 //<param name="s">Pointer to pointer to string.</param>
315 //<returns>Success.</returns>
316 inline static bool strwnorm(TCHAR
** s
)
318 if(!s
|| !*s
) return false; //No string to normalize.
319 while(**s
> 0 && **s
< _T('!')) ++(*s
); //As long as we hit whitespace, increment the string pointer.
320 const TCHAR
* temp
= *s
;
321 while(0 != *temp
++); //Find the terminating null.
322 long n
= (long)(temp
-*s
-1);
323 TCHAR
* norm
= (TCHAR
*)malloc(sizeof(TCHAR
)*(n
+1)); //Allocate a temporary normalization buffer.
324 if(!norm
) return false; //Allocation failed.
325 memset(norm
,0,sizeof(TCHAR
)*(n
+1)); //Zero it.
329 for(i
=1; i
<n
; ++i
) //For each character, starting at offset 1.
331 if((*s
)[i
] < _T('!')) //Whitespace-like.
333 if((*s
)[i
-1] >= _T('!')) //Previous was not whitespace-like.
335 norm
[j
] = _T(' '); //Convert to a space TCHAR.
336 ++j
; //Normalization buffer grew by one TCHAR.
339 else { norm
[j
] = (*s
)[i
]; ++j
; } //Not whitespace, so just copy over.
341 if(j
< n
) //Normalization buffer is actually different that input.
343 _tcsncpy(*s
,norm
,j
); //So, copy it back to input.
344 (*s
)[j
] = 0; //Zero-terminate.
346 free(norm
); //Don't need this anymore.
347 --n
; //Start from the last string TCHAR.
348 for(i
=n
; (i
> -1) && (*s
)[i
] > 0 && (*s
)[i
] < _T('!'); --i
); //Find the first non-whitespace from the end.
349 if(i
<n
) (*s
)[i
+1] = 0; //Truncate it.
357 //<summary>Set structure string member to given value.</summary>
358 //<param name="dest">Pointer to pointer to destination.</param>
359 //<param name="src">Source.</param>
360 //<param name="insitu">Pointer to boolean in-situ string flag.</param>
361 //<returns>True if member was set to the new value.</returns>
363 // If 'src' is larger than 'dest' then 'dest' is resized, in which case
364 // it is probably no longer in-situ,and 'in_situ' is set to false. If
365 // 'dest' is already no longer in-situ, and 'src' is too small then the
366 // existing memory pointed to is freed. If 'dest' is larger than or equal
367 // to 'dest' then it is merely copied with no resize.
369 inline static bool strcpyinsitu
376 unsigned int& destlen
380 if(!dest
|| !src
|| !insitu
) return false; //Bad argument(s), so fail.
381 #ifndef PUGOPT_NONSEG //Always use heap for our r/o string.
382 size_t l
= (*dest
) ? _tcslen(*dest
) : 0; //How long is destination?
383 if(l
>= _tcslen(src
)) //Destination is large enough, so just copy.
385 _tcscpy(*dest
,src
); //Copy.
386 return true; //Success.
388 else //Destination is too small.
391 if(*dest
&& !*insitu
) free(*dest
); //If destination is not in-situ, then free it.
392 *dest
= NULL
; //Mark destination as NULL, forcing 'StrCatGrow' to 'malloc.
394 if(strcatgrown(dest
,src
,destlen
)) //Allocate & copy source to destination
396 if(strcatgrow(dest
,src
)) //Allocate & copy source to destination
399 *insitu
= false; //Mark as no longer being in-situ, so we can free it later.
400 return true; //Success.
403 return false; //Failure.
407 //<summary>Character set pattern match.</summary>
408 //<param name="lhs">String or expression for left-hand side of comparison.</param>
409 //<param name="rhs">String for right-hand side of comparison.</param>
410 //<remarks>Used by 'strcmpwild'.</remarks>
411 inline int strcmpwild_cset(const TCHAR
** src
,const TCHAR
** dst
)
421 while(**src
!= _T(']') || star
== 1)
425 if(**src
== _T('-') && *(*src
-1) < *(*src
+1) && *(*src
+1) != _T(']') && star
== 0)
427 if(**dst
>= *(*src
-1) && **dst
<= *(*src
+1))
433 else if(**src
== **dst
) find
= 1;
438 if(excl
== 1) find
= (1 - find
);
439 if(find
== 1) ++(*dst
);
444 inline int strcmpwild_impl(const TCHAR
* src
,const TCHAR
* dst
); //Forward declaration.
447 //<summary>Wildcard pattern match.</summary>
448 //<param name="lhs">String or expression for left-hand side of comparison.</param>
449 //<param name="rhs">String for right-hand side of comparison.</param>
450 //<remarks>Used by 'strcmpwild'.</remarks>
451 inline int strcmpwild_astr(const TCHAR
** src
,const TCHAR
** dst
)
455 while((**dst
!= 0 && **src
== _T('?')) || **src
== _T('*'))
457 if(**src
== _T('?')) ++(*dst
);
460 while(**src
== _T('*')) ++(*src
);
461 if(**dst
== 0 && **src
!= 0) return 0;
462 if(**dst
== 0 && **src
== 0) return 1;
465 if(strcmpwild_impl(*src
,*dst
) == 0)
470 while(**src
!= **dst
&& **src
!= _T('[') && **dst
!= 0)
473 while((**dst
!= 0) ? strcmpwild_impl(*src
,*dst
) == 0 : 0 != (find
=0));
475 if(**dst
== 0 && **src
== 0) find
= 1;
481 //<summary>Compare two strings, with globbing, and character sets.</summary>
482 //<param name="lhs">String or expression for left-hand side of comparison.</param>
483 //<param name="rhs">String for right-hand side of comparison.</param>
484 //<remarks>Used by 'strcmpwild'.</remarks>
485 inline int strcmpwild_impl(const TCHAR
* src
,const TCHAR
* dst
)
488 for(; *src
!= 0 && find
== 1 && *dst
!= 0; ++src
)
492 case _T('?'): ++dst
; break;
493 case _T('['): ++src
; find
= strcmpwild_cset(&src
,&dst
); break;
494 case _T('*'): find
= strcmpwild_astr(&src
,&dst
); --src
; break;
495 default : find
= (int) (*src
== *dst
); ++dst
;
498 while(*src
== _T('*') && find
== 1) ++src
;
499 return (int) (find
== 1 && *dst
== 0 && *src
== 0);
502 //<summary>Compare two strings, with globbing, and character sets.</summary>
503 //<param name="lhs">String or expression for left-hand side of comparison.</param>
504 //<param name="rhs">String for right-hand side of comparison.</param>
506 // Returns 1 if src does not match dst, or -1 if either src or dst are null,
507 // or 0 if src matches dst.
510 // Simple regular expressions are permitted in 'src': The character '*' matches
511 // zero or more characters up to the next pattern, or the end of the string. The
512 // '?' character matches any single character. Character sets and negation are
513 // also permitted, for example, '[abcd]', '[a-zA-Z]', etc.
515 inline int strcmpwild(const TCHAR
* src
,const TCHAR
* dst
)
517 if(!src
|| !dst
) return -1;
518 return (strcmpwild_impl(src
,dst
)==1)?0:1;
522 //<summary>Allocate & init an xml_attribute_struct structure.</summary>
523 //<returns>Pointer to new xml_attribute_struct structure.</returns>
524 inline static xml_attribute_struct
* new_attribute(void)
526 xml_attribute_struct
* p
= (xml_attribute_struct
*)malloc(sizeof(xml_attribute_struct
)); //Allocate one attribute.
527 if(p
) //If allocation succeeded.
529 p
->name
= p
->value
= 0; //No name or value.
531 p
->name_size
= p
->value_size
= 0; //Lengths of zero.
533 p
->name_insitu
= p
->value_insitu
= true; //Default to being in-situ of the parse string.
539 //<summary>Allocate & init an xml_node_struct structure.</summary>
540 //<param name="type">Desired node type.</param>
541 //<returns>Pointer to new xml_node_struct structure.</returns>
542 inline static xml_node_struct
* new_node(xml_node_type type
= node_element
)
544 xml_node_struct
* p
= (xml_node_struct
*)malloc(sizeof(xml_node_struct
)); //Allocate one node.
545 if(p
) //If allocation succeeded.
547 p
->name
= p
->value
= 0; //No name or data.
549 p
->name_size
= p
->value_size
= 0;
551 p
->type
= type
; //Set the desired type.
552 p
->attributes
= p
->children
= 0; //No attributes or children.
553 p
->name_insitu
= p
->value_insitu
= true; //Default to being in-situ of the parse string.
556 type
!= node_document
&& //None of these will have attributes.
557 type
!= node_pcdata
&&
558 type
!= node_cdata
&&
559 type
!= node_include
&&
562 p
->attribute
= (xml_attribute_struct
**)malloc(sizeof(xml_attribute_struct
*)); //Allocate one attribute.
563 else p
->attribute
= NULL
;
564 p
->attribute_space
= (p
->attribute
) ? 1 : 0;
567 type
== node_element
|| //Only these will have children.
568 type
== node_doctype
||
569 type
== node_document
571 p
->child
= (xml_node_struct
**)malloc(sizeof(xml_node_struct
*)); //Allocate one child.
572 else p
->child
= NULL
;
573 p
->child_space
= (p
->child
) ? 1 : 0;
579 //<summary>Allocate & append a new xml_node_struct onto the given parent.</summary>
580 //<param name="parent">Pointer to parent node.</param>
581 //<param name="grow">Pointer space growth increment.</param>
582 //<param name="type">Desired node type.</param>
583 //<returns>Pointer to new node.</returns>
584 //<remarks>Child pointer space of 'node' may be reallocated.</remarks>
585 inline static xml_node_struct
* append_node(xml_node_struct
* parent
,long grow
,xml_node_type type
= node_element
)
587 if(!parent
) return NULL
; //Must have a parent.
588 if(parent
->children
== parent
->child_space
) //Out of pointer space.
590 xml_node_struct
** t
= (xml_node_struct
**)realloc(parent
->child
,sizeof(xml_node_struct
*)*(parent
->child_space
+grow
)); //Grow pointer space.
591 if(t
) //Reallocation succeeded.
594 parent
->child_space
+= grow
; //Update the available space.
597 xml_node_struct
* child
= new_node(type
); //Allocate a new child.
598 child
->parent
= parent
; //Set it's parent pointer.
599 parent
->child
[parent
->children
] = child
; //Set the parent's child pointer.
600 parent
->children
++; //One more child.
605 //<summary>Allocate & append a new attribute to the given xml_node_struct.</summary>
606 //<param name="node">Pointer to parent node.</param>
607 //<param name="grow">Pointer space growth increment.</param>
608 //<returns>Pointer to appended xml_attribute_struct.</returns>
609 //<remarks>Attribute pointer space of 'node' may be reallocated.</remarks>
610 inline static xml_attribute_struct
* append_attribute(xml_node_struct
* node
,long grow
)
612 if(!node
) return NULL
;
613 xml_attribute_struct
* a
= new_attribute();
615 if(node
->attributes
== node
->attribute_space
) //Out of space, so grow.
617 xml_attribute_struct
** t
= (xml_attribute_struct
**)realloc(node
->attribute
,sizeof(xml_node_struct
*)*(node
->attribute_space
+grow
));
621 node
->attribute_space
+= grow
;
624 node
->attribute
[node
->attributes
] = a
;
630 //<summary>Non-recursively free a tree.</summary>
631 //<param name="root">
632 // Pointer to the root of the tree. Note: 'root' must have been dynamically
633 // allocated using 'malloc' or 'realloc', as 'free_node' tries to also free
634 // the structure pointed to by 'root'.
636 //<remarks>'root' no longer points to a valid structure.</remarks>
637 inline static void free_node(xml_node_struct
* node
)
641 register xml_node_struct
* cursor
= node
;
643 //Free all children of children.
647 for(; cursor
->children
>0; --cursor
->children
) //Free each child in turn; 'children' keeps count while we jump around.
649 register xml_node_struct
* t
= cursor
->child
[cursor
->children
-1]; //Take a pointer to the child.
650 if(t
&& t
->children
) //If the child has children.
652 cursor
= t
; //Step in.
653 goto LOC_STEP_INTO
; //Step into this node.
657 if(t
->attributes
) //Child has attributes.
659 register unsigned int n
= t
->attributes
; //Free each attribute.
660 for(register unsigned int i
=0; i
<n
; ++i
)
662 if(t
->attribute
[i
]->name
&& !t
->attribute
[i
]->name_insitu
)
663 free(t
->attribute
[i
]->name
);
664 if(t
->attribute
[i
]->value
&& !t
->attribute
[i
]->value_insitu
)
665 free(t
->attribute
[i
]->value
);
666 free(t
->attribute
[i
]);
669 if(t
->attribute
) free(t
->attribute
); //Free attribute pointer space.
670 if(t
->child
) free(t
->child
); //Free child pointer space.
671 if(t
->name
&& !t
->name_insitu
) free(t
->name
);
672 if(t
->value
&& !t
->value_insitu
) free(t
->value
);
673 free(t
); //Free the child node.
676 cursor
= cursor
->parent
; //Step out.
678 while(cursor
->children
); //While there are children.
679 //Finally, free the root's children & the root itself.
680 if(cursor
->attributes
)
682 register unsigned int n
= cursor
->attributes
;
683 for(register unsigned int i
=0; i
<n
; ++i
)
685 if(cursor
->attribute
[i
]->name
&& !cursor
->attribute
[i
]->name_insitu
)
686 free(cursor
->attribute
[i
]->name
);
687 if(cursor
->attribute
[i
]->value
&& !cursor
->attribute
[i
]->value_insitu
)
688 free(cursor
->attribute
[i
]->value
);
689 free(cursor
->attribute
[i
]);
692 if(cursor
->attribute
) free(cursor
->attribute
); //Free attribute pointer space.
693 if(cursor
->child
) free(cursor
->child
); //Free child pointer space.
694 if(cursor
->name
&& !cursor
->name_insitu
) free(cursor
->name
); //Free name & data.
695 if(cursor
->value
&& !cursor
->value_insitu
) free(cursor
->value
);
696 free(cursor
); //Free the root itself.
699 //<summary>Recursively free a tree.</summary>
700 //<param name="root">Pointer to the root of the tree.</param>
701 //<remarks>Not used.</remarks>
702 inline static void free_node_recursive(xml_node_struct
* root
)
706 unsigned int n
= root
->attributes
;
707 register unsigned int i
;
710 if(root
->attribute
[i
]->name
&& !root
->attribute
[i
]->name_insitu
)
711 free(root
->attribute
[i
]->name
);
712 if(root
->attribute
[i
]->value
&& !root
->attribute
[i
]->value_insitu
)
713 free(root
->attribute
[i
]->value
);
714 free(root
->attribute
[i
]);
716 free(root
->attribute
);
719 free_node_recursive(root
->child
[i
]);
721 if(root
->name
&& !root
->name_insitu
) free(root
->name
);
722 if(root
->value
&& !root
->value_insitu
) free(root
->value
);
728 //<summary>Parser utilities.</summary>
729 #define SKIPWS() { while(chartype_space(*s)) ++s; if(*s==0) return s; }
730 #define OPTSET(OPT) ( optmsk & OPT )
731 #define PUSHNODE(TYPE) { cursor = append_node(cursor,growby,TYPE); }
732 #define POPNODE() { cursor = cursor->parent; }
733 #define SCANFOR(X) { while(*s!=0 && !(X)) ++s; if(*s==0) return s; }
734 #define SCANWHILE(X) { while((X)) ++s; if(*s==0) return s; }
735 #ifndef PUGOPT_NONSEG
736 # define ENDSEG() { ch = *s; *s = 0; ++s; if(*s==0) return s; }
738 # define ENDSEG() { ch = *s; ++s; if(*s==0) return s; }
739 # define SETLEN() ( cursor->value_size = s - cursor->value )
740 # define ENDSEGDAT() { ch = *s; SETLEN(); ++s; if(*s==0) return s; }
741 # define ENDSEGNAM(S) { ch = *s; S->name_size = s - S->name; ++s; if(*s==0) return s; }
742 # define ENDSEGATT(S) { ch = *s; S->value_size = s - S->value; ++s; if(*s==0) return s; }
746 //<summary>Static single-pass in-situ parse the given xml string.</summary>
747 //<param name="s">Pointer to XML-formatted string.</param>
748 //<param name="root">Pointer to root.</param>
749 //<param name="grow">Pointer space growth increment.</param>
750 //<param name="optmsk">Parse options mask.</param>
751 //<returns>Last string position or null.</returns>
753 // Input string is zero-segmented if 'PUGOPT_NONSEG' is not defined. Memory
754 // may have been allocated to 'root' (free with 'free_node').
756 static TCHAR
* parse(register TCHAR
* s
,xml_node_struct
* xmldoc
,long growby
,unsigned long optmsk
= parse_default
)
758 if(!s
|| !xmldoc
) return s
;
759 TCHAR ch
= 0; //Current char, in cases where we must null-terminate before we test.
760 xml_node_struct
* cursor
= xmldoc
; //Tree node cursor.
761 TCHAR
* mark
= s
; //Marked string position for temporary look-ahead.
764 LOC_SEARCH
: //Obliviously search for next element.
765 SCANFOR(chartype_enter(*s
)); //Find the next '<'.
766 if(chartype_enter(*s
))
769 LOC_CLASSIFY
: //What kind of element?
770 if(chartype_pi(*s
)) //'<?...'
773 if(chartype_symbol(*s
) && OPTSET(parse_pi
))
776 SCANFOR(chartype_pi(*s
)); //Look for terminating '?'.
777 #ifndef PUGOPT_NONSEG
778 if(chartype_pi(*s
)) *s
= _T('/'); //Same semantics as for '<.../>', so fudge it.
781 PUSHNODE(node_pi
); //Append a new node on the tree.
782 goto LOC_ELEMENT
; //Go read the element name.
784 else //Bad PI or parse_pi not set.
786 SCANFOR(chartype_leave(*s
)); //Look for '>'.
792 else if(chartype_special(*s
)) //'<!...'
795 if(chartype_dash(*s
)) //'<!-...'
798 if(OPTSET(parse_comments
) && chartype_dash(*s
)) //'<!--...'
801 PUSHNODE(node_comment
); //Append a new node on the tree.
802 cursor
->value
= s
; //Save the offset.
803 while(*s
!=0 && *(s
+1) && *(s
+2) && !((chartype_dash(*s
) && chartype_dash(*(s
+1))) && chartype_leave(*(s
+2)))) ++s
; //Scan for terminating '-->'.
806 SETLEN(); //NF 19 Jan 2003.
808 *s
= 0; //Zero-terminate this segment at the first terminating '-'.
810 if(OPTSET(parse_trim_comment
)) //Trim whitespace.
813 strwtrim(&cursor
->value
,cursor
->value_size
);
815 if(OPTSET(parse_wnorm
)) strwnorm(&cursor
->value
);
816 else strwtrim(&cursor
->value
);
819 s
+= 2; //Step over the '\0-'.
820 POPNODE(); //Pop since this is a standalone.
821 goto LOC_LEAVE
; //Look for any following PCDATA.
825 while(*s
!=0 && *(s
+1)!=0 && *(s
+2)!=0 && !((chartype_dash(*s
) && chartype_dash(*(s
+1))) && chartype_leave(*(s
+2)))) ++s
; //Scan for terminating '-->'.
828 goto LOC_LEAVE
; //Look for any following PCDATA.
831 else if(chartype_lbracket(*s
)) //'<![...'
834 if(*s
==_T('I')) //'<![I...'
837 if(*s
==_T('N')) //'<![IN...'
840 if(*s
==_T('C')) //'<![INC...'
843 if(*s
==_T('L')) //'<![INCL...'
846 if(*s
==_T('U')) //'<![INCLU...'
849 if(*s
==_T('D')) //'<![INCLUD...'
852 if(*s
==_T('E')) //'<![INCLUDE...'
855 if(chartype_lbracket(*s
)) //'<![INCLUDE[...'
858 if(OPTSET(node_cdata
))
860 PUSHNODE(node_include
); //Append a new node on the tree.
861 cursor
->value
= s
; //Save the offset.
862 while(!(chartype_rbracket(*s
) && chartype_rbracket(*(s
+1)) && chartype_leave(*(s
+2)))) ++s
; //Scan for terminating ']]>'.
863 if(chartype_rbracket(*s
))
866 SETLEN(); //NF 19 Jan 2003.
868 *s
= 0; //Zero-terminate this segment.
871 if(OPTSET(parse_trim_cdata
)) //Trim whitespace.
874 strwtrim(&cursor
->value
, cursor
->value_size
);
876 if(OPTSET(parse_wnorm
)) strwnorm(&cursor
->value
);
877 else strwtrim(&cursor
->value
);
881 POPNODE(); //Pop since this is a standalone.
883 else //Flagged for discard, but we still have to scan for the terminator.
885 while(*s
!=0 && *(s
+1)!=0 && *(s
+2)!=0 && !(chartype_rbracket(*s
) && chartype_rbracket(*(s
+1)) && chartype_leave(*(s
+2)))) ++s
; //Scan for terminating ']]>'.
888 ++s
; //Step over the last ']'.
889 goto LOC_LEAVE
; //Look for any following PCDATA.
898 else if(*s
==_T('C')) //'<![C...'
901 if(*s
==_T('D')) //'<![CD...'
904 if(*s
==_T('A')) //'<![CDA...'
907 if(*s
==_T('T')) //'<![CDAT...'
910 if(*s
==_T('A')) //'<![CDATA...'
913 if(chartype_lbracket(*s
)) //'<![CDATA[...'
916 if(OPTSET(parse_cdata
))
918 PUSHNODE(node_cdata
); //Append a new node on the tree.
919 cursor
->value
= s
; //Save the offset.
920 while(*s
!=0 && *(s
+1)!=0 && *(s
+2)!=0 && !(chartype_rbracket(*s
) && chartype_rbracket(*(s
+1)) && chartype_leave(*(s
+2)))) ++s
; //Scan for terminating ']]>'.
921 if(*(s
+2)==0) return s
; //Very badly formed.
922 if(chartype_rbracket(*s
))
925 SETLEN(); //NF 19 Jan 2003.
927 *s
= 0; //Zero-terminate this segment.
930 if(OPTSET(parse_trim_cdata
)) //Trim whitespace.
933 strwtrim(&cursor
->value
,cursor
->value_size
);
935 if(OPTSET(parse_wnorm
)) strwnorm(&cursor
->value
);
936 else strwtrim(&cursor
->value
);
940 POPNODE(); //Pop since this is a standalone.
942 else //Flagged for discard, but we still have to scan for the terminator.
944 while(*s
!=0 && *(s
+1)!=0 && *(s
+2)!=0 && !(chartype_rbracket(*s
) && chartype_rbracket(*(s
+1)) && chartype_leave(*(s
+2)))) ++s
; //Scan for terminating ']]>'.
947 ++s
; //Step over the last ']'.
948 goto LOC_LEAVE
; //Look for any following PCDATA.
955 continue; //Probably a corrupted CDATA section, so just eat it.
957 else if(*s
==_T('D')) //'<!D...'
960 if(*s
==_T('O')) //'<!DO...'
963 if(*s
==_T('C')) //'<!DOC...'
966 if(*s
==_T('T')) //'<!DOCT...'
969 if(*s
==_T('Y')) //'<!DOCTY...'
972 if(*s
==_T('P')) //'<!DOCTYP...'
975 if(*s
==_T('E')) //'<!DOCTYPE...'
978 SKIPWS(); //Eat any whitespace.
979 xml_attribute_struct
* a
= 0;
980 if(OPTSET(parse_doctype
))
982 PUSHNODE(node_doctype
); //Append a new node on the tree.
983 a
= append_attribute(cursor
,3); //Store the DOCTYPE name.
984 a
->value
= a
->name
= s
; //Save the offset.
986 SCANWHILE(chartype_symbol(*s
)); //'<!DOCTYPE symbol...'
988 if(OPTSET(parse_doctype
))
989 a
->name_size
= a
->value_size
= s
- a
->value
; //Save the length. rem: Before ENDSEG()
991 ENDSEG(); //Save char in 'ch', terminate & step over.
992 if(chartype_space(ch
)) SKIPWS(); //Eat any whitespace.
994 if(chartype_symbol(*s
))
997 SCANWHILE(chartype_symbol(*s
)); //'...symbol SYSTEM...'
998 if(OPTSET(parse_doctype
))
1000 a
= append_attribute(cursor
,1);
1001 a
->value
= a
->name
= mark
;
1002 #ifdef PUGOPT_NONSEG
1003 a
->value_size
= a
->name_size
= s
- mark
; //NF 19 Jan 2003.
1011 if(chartype_quote(*s
)) //'...SYSTEM "..."'
1017 while(*s
!=0 && *s
!= ch
) ++s
;
1020 if(OPTSET(parse_doctype
))
1022 a
= append_attribute(cursor
,1);
1024 #ifdef PUGOPT_NONSEG
1025 a
->value_size
= s
- mark
; //NF 19 Jan 2003.
1031 SKIPWS(); //Eat whitespace.
1032 if(chartype_quote(*s
)) goto LOC_DOCTYPE_QUOTE
; //Another quoted section to store.
1033 else if(chartype_symbol(*s
)) goto LOC_DOCTYPE_SYMBOL
; //Not wellformed, but just parse it.
1036 if(chartype_lbracket(*s
)) //'...[...'
1038 ++s
; //Step over the bracket.
1039 if(OPTSET(parse_doctype
)) cursor
->value
= s
; //Store the offset.
1040 unsigned int bd
= 1; //Bracket depth counter.
1041 while(*s
!=0) //Loop till we're out of all brackets.
1043 if(chartype_rbracket(*s
)) --bd
;
1044 else if(chartype_lbracket(*s
)) ++bd
;
1048 //Note: 's' now points to end of DTD, i.e.: ']'.
1049 if(OPTSET(parse_doctype
))
1051 //Note: If we aren't parsing the DTD ('!parse_dtd', etc.) then it is stored in the DOM as one whole chunk.
1052 #ifdef PUGOPT_NONSEG
1053 SETLEN(); //NF 19 Jan 2003
1055 *s
= 0; //Zero-terminate.
1057 if(OPTSET(parse_dtd
)||OPTSET(parse_dtd_only
))
1059 if(OPTSET(parse_dtd
))
1061 #ifdef PUGOPT_NONSEG
1065 *s
= 0; //Zero-terminate.
1066 parse(cursor
->value
,cursor
,growby
,optmsk
); //Parse it.
1068 catch(...){ assert(false); }
1071 parse(cursor
->value
,cursor
,growby
,optmsk
); //Parse it.
1074 if(OPTSET(parse_dtd_only
)) return (s
+1); //Flagged to parse DTD only, so leave here.
1076 else if(OPTSET(parse_trim_doctype
)) //Trim whitespace.
1078 #ifdef PUGOPT_NONSEG
1079 strwtrim(&cursor
->value
, cursor
->value_size
);
1081 if(OPTSET(parse_wnorm
)) strwnorm(&cursor
->value
);
1082 else strwtrim(&cursor
->value
);
1085 ++s
; //Step over the zero.
1086 POPNODE(); //Pop since this is a standalone.
1088 SCANFOR(chartype_leave(*s
));
1091 //Fall-through; make sure we pop.
1092 POPNODE(); //Pop since this is a standalone.
1101 else if(chartype_symbol(*s
)) //An inline DTD tag.
1104 SCANWHILE(chartype_symbol(*s
));
1105 ENDSEG(); //Save char in 'ch', terminate & step over.
1106 xml_node_type e
= node_dtd_entity
;
1107 #ifdef PUGOPT_NONSEG
1108 const unsigned int dtdilen
= (s
- 1) - mark
;
1109 if(_tcsncmp(mark
,_T("ATTLIST"),max((7*sizeof(TCHAR
)),dtdilen
))==0) e
= node_dtd_attlist
;
1110 else if(_tcsncmp(mark
,_T("ELEMENT"),max((7*sizeof(TCHAR
)),dtdilen
))==0) e
= node_dtd_element
;
1111 else if(_tcsncmp(mark
,_T("NOTATION"),max((8*sizeof(TCHAR
)),dtdilen
))==0) e
= node_dtd_notation
;
1113 if(_tcscmp(mark
,_T("ATTLIST"))==0) e
= node_dtd_attlist
;
1114 else if(_tcscmp(mark
,_T("ELEMENT"))==0) e
= node_dtd_element
;
1115 else if(_tcscmp(mark
,_T("NOTATION"))==0) e
= node_dtd_notation
;
1117 PUSHNODE(e
); //Append a new node on the tree.
1118 if(*s
!=0 && chartype_space(ch
))
1120 SKIPWS(); //Eat whitespace.
1121 if(chartype_symbol(*s
) || *s
==_T('%'))
1124 if(*s
==_T('%')) //Could be '<!ENTITY % name' -or- '<!ENTITY %name'
1126 #ifdef PUGOPT_NONSEG
1127 //Note: For memory-mapped file support we need to treat 's' as read-only so we can't do '*(s-1) = _T('%');' below.
1128 cursor
->name
= mark
; //Sort out extraneous whitespace when we retrieve it. TODO: Whitespace cleanup.
1131 if(chartype_space(*s
))
1133 SKIPWS(); //Eat whitespace.
1134 #ifndef PUGOPT_NONSEG
1136 cursor
->name
= (s
-1);
1139 #ifndef PUGOPT_NONSEG
1140 else cursor
->name
= mark
;
1143 else cursor
->name
= s
;
1144 SCANWHILE(chartype_symbol(*s
));
1145 #ifdef PUGOPT_NONSEG
1146 cursor
->name_size
= s
- cursor
->name
;
1148 ENDSEG(); //Save char in 'ch', terminate & step over.
1149 if(chartype_space(ch
))
1151 SKIPWS(); //Eat whitespace.
1152 if(e
== node_dtd_entity
) //Special case; may have multiple quoted sections w/anything inside.
1154 cursor
->value
= s
; //Just store everything here.
1155 bool qq
= false; //Quote in/out flag.
1156 while(*s
!= 0) //Loop till we find the right sequence.
1158 if(!qq
&& chartype_quote(*s
)){ ch
= *s
; qq
= true; }
1159 else if(qq
&& *s
== ch
) qq
= false;
1160 else if(!qq
&& chartype_leave(*s
)) //Not in quoted reqion and '>' hit.
1162 #ifdef PUGOPT_NONSEG
1163 SETLEN(); //NF 19 Jan 2003.
1168 if(OPTSET(parse_trim_entity
))
1170 #ifdef PUGOPT_NONSEG
1171 strwtrim(&cursor
->value
,cursor
->value_size
);
1173 if(OPTSET(parse_wnorm
)) strwnorm(&cursor
->value
);
1174 else strwtrim(&cursor
->value
);
1182 if(OPTSET(parse_trim_entity
))
1184 #ifdef PUGOPT_NONSEG
1185 strwtrim(&cursor
->value
, cursor
->value_size
);
1187 if(OPTSET(parse_wnorm
)) strwnorm(&cursor
->value
);
1188 else strwtrim(&cursor
->value
);
1195 SCANFOR(chartype_leave(*s
)); //Just look for '>'.
1196 #ifdef PUGOPT_NONSEG
1197 SETLEN(); //NF 19 Jan 2003.
1202 if(OPTSET(parse_trim_entity
))
1204 #ifdef PUGOPT_NONSEG
1205 strwtrim(&cursor
->value
, cursor
->value_size
);
1207 if(OPTSET(parse_wnorm
)) strwnorm(&cursor
->value
);
1208 else strwtrim(&cursor
->value
);
1220 else if(chartype_symbol(*s
)) //'<#...'
1222 cursor
= append_node(cursor
,growby
); //Append a new node to the tree.
1223 LOC_ELEMENT
: //Scan for & store element name.
1225 SCANWHILE(chartype_symbol(*s
)); //Scan for a terminator.
1226 #ifdef PUGOPT_NONSEG
1227 cursor
->name_size
= s
- cursor
->name
; //Note: Before ENDSEG().
1229 ENDSEG(); //Save char in 'ch', terminate & step over.
1234 chartype_close(ch
) //'</...'
1235 #ifdef PUGOPT_NONSEG
1237 //chartype_pi(ch) //Treat '?>' as '/>' NF 19 Jan 2003
1242 SCANFOR(chartype_leave(*s
)); //Scan for '>', stepping over the tag name.
1246 else if(*s
!=0 && !chartype_space(ch
)) goto LOC_PCDATA
; //No attributes, so scan for PCDATA.
1247 else if(*s
!=0 && chartype_space(ch
))
1249 SKIPWS(); //Eat any whitespace.
1251 if(chartype_symbol(*s
)) //<... #...
1253 xml_attribute_struct
* a
= append_attribute(cursor
,growby
); //Make space for this attribute.
1254 a
->name
= s
; //Save the offset.
1255 SCANWHILE(chartype_symbol(*s
)); //Scan for a terminator.
1256 #ifdef PUGOPT_NONSEG
1259 ENDSEG(); //Save char in 'ch', terminate & step over.
1261 if(*s
!=0 && chartype_space(ch
)) SKIPWS(); //Eat any whitespace.
1262 if(*s
!=0 && (chartype_equals(ch
) || chartype_equals(*s
))) //'<... #=...'
1264 if(chartype_equals(*s
)) ++s
;
1265 SKIPWS(); //Eat any whitespace.
1266 if(chartype_quote(*s
)) //'<... #="...'
1268 ch
= *s
; //Save quote char to avoid breaking on "''" -or- '""'.
1269 ++s
; //Step over the quote.
1270 a
->value
= s
; //Save the offset.
1271 SCANFOR(*s
== ch
); //Scan for the terminating quote, or '>'.
1272 #ifdef PUGOPT_NONSEG
1275 ENDSEG(); //Save char in 'ch', terminate & step over.
1277 if(OPTSET(parse_trim_attribute
)) //Trim whitespace.
1279 #ifdef PUGOPT_NONSEG
1280 strwtrim(&a
->value
,a
->value_size
);
1282 if(OPTSET(parse_wnorm
)) strwnorm(&a
->value
);
1283 else strwtrim(&a
->value
);
1286 if(chartype_leave(*s
)){ ++s
; goto LOC_PCDATA
; }
1287 else if(chartype_close(*s
))
1291 SKIPWS(); //Eat any whitespace.
1292 if(chartype_leave(*s
)) ++s
;
1295 if(chartype_space(*s
)) //This may indicate a following attribute.
1297 SKIPWS(); //Eat any whitespace.
1298 goto LOC_ATTRIBUTE
; //Go scan for additional attributes.
1302 if(chartype_symbol(*s
)) goto LOC_ATTRIBUTE
;
1303 else if(*s
!=0 && cursor
->type
== node_pi
)
1305 #ifdef PUGOPT_NONSEG
1306 SCANFOR(chartype_pi(*s
)); //compliments change where we don't fudge to '/>' when we find the PI. NF 20 Jan 2003
1307 SKIPWS(); //Eat any whitespace.
1308 if(chartype_pi(*s
)) ++s
;
1310 SCANFOR(chartype_close(*s
));
1311 SKIPWS(); //Eat any whitespace.
1312 if(chartype_close(*s
)) ++s
;
1314 SKIPWS(); //Eat any whitespace.
1315 if(chartype_leave(*s
)) ++s
;
1322 if(chartype_leave(*s
)) //'...>'
1324 ++s
; //Step over the '>'.
1325 LOC_PCDATA
: //'>...<'
1326 mark
= s
; //Save this offset while searching for a terminator.
1327 SKIPWS(); //Eat whitespace if no genuine PCDATA here.
1328 if(chartype_enter(*s
)) //We hit a '<...', with only whitespace, so don't bother storing anything.
1330 if(chartype_close(*(s
+1))) //'</...'
1332 SCANFOR(chartype_leave(*s
)); //Scan for '>', stepping over any end-tag name.
1334 continue; //Continue scanning.
1336 else goto LOC_SEARCH
; //Expect a new element enter, so go scan for it.
1338 s
= mark
; //We hit something other than whitespace; restore the original offset.
1339 PUSHNODE(node_pcdata
); //Append a new node on the tree.
1340 cursor
->value
= s
; //Save the offset.
1341 SCANFOR(chartype_enter(*s
)); //'...<'
1342 #ifdef PUGOPT_NONSEG
1345 ENDSEG(); //Save char in 'ch', terminate & step over.
1347 if(OPTSET(parse_trim_pcdata
)) //Trim whitespace.
1349 #ifdef PUGOPT_NONSEG
1350 strwtrim(&cursor
->value
,cursor
->value_size
);
1352 if(OPTSET(parse_wnorm
)) strwnorm(&cursor
->value
);
1353 else strwtrim(&cursor
->value
);
1356 POPNODE(); //Pop since this is a standalone.
1357 if(chartype_enter(ch
)) //Did we hit a '<...'?
1359 if(chartype_close(*s
)) //'</...'
1361 SCANFOR(chartype_leave(*s
)); //'...>'
1365 else if(chartype_special(*s
)) goto LOC_CLASSIFY
; //We hit a '<!...'. We must test this here if we want comments intermixed w/PCDATA.
1366 else if(*s
) goto LOC_CLASSIFY
;
1371 else if(chartype_close(*s
)) //'.../'
1374 if(chartype_leave(*s
)) //'.../>'
1383 else if(chartype_close(*s
)) //'.../'
1385 SCANFOR(chartype_leave(*s
)); //'.../>'
1396 //<summary>Read data from the file at 'path' into the buffer. Free with 'free'.</summary>
1397 //<param name="path">File path.</param>
1398 //<param name="buffer">Pointer to pointer to string to recieve buffer.</param>
1399 //<param name="size">Pointer to count bytes read and stored in 'buffer'.</param>
1400 //<param name="tempsize">Temporary read buffer size.</param>
1401 //<returns>Success if file at 'path' was opened and bytes were read into memory.</returns>
1402 //<remarks>Memory is allocated at '*buffer'. Free with 'free'.</remarks>
1403 inline static bool load_file(const TCHAR* path,TCHAR** buffer,unsigned long* size,unsigned long tempsize = 4096)
1405 if(!path || !buffer || !size) return false;
1408 HANDLE file_handle = CreateFile(path,GENERIC_READ,FILE_SHARE_READ,NULL,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,NULL);
1409 if(file_handle == INVALID_HANDLE_VALUE) return false;
1410 TCHAR* temp = (TCHAR*) malloc(sizeof(TCHAR)*tempsize);
1411 if(!temp) return false;
1412 unsigned long read_bytes = 0;
1413 ZeroMemory(temp,sizeof(TCHAR)*tempsize);
1414 while(ReadFile(file_handle,(void*)temp,tempsize-1,&read_bytes,0) && read_bytes && strcatgrow(buffer,temp))
1416 *size += read_bytes;
1417 ZeroMemory(temp,sizeof(TCHAR)*tempsize);
1419 CloseHandle(file_handle);
1421 return (*size) ? true : false;
1426 //<summary>A void pointer array. Used by various xml_node::find* functions.</summary>
1429 //Internal Data Members
1431 unsigned int _size
; //Count items.
1432 unsigned int _room
; //Available space.
1433 void** _data
; //The list.
1434 unsigned int _grow
; //Grow by increment.
1436 //<summary>Default constructor.</summary>
1437 //<param name="grow">Array growth increment.</param>
1438 pointer_array(unsigned int grow
= 4):
1444 _data
= (void**)malloc(sizeof(void*)*_grow
);
1445 _room
= (_data
) ? _grow
: 0;
1447 ~pointer_array(){ if(_data
) free(_data
); }
1449 bool empty(){ return (_size
== 0); } //True if there is no data in the array.
1450 void remove_all(){ _size
= 0; } //Remove all data elements from the array.
1451 void clear() //Free any allocated memory.
1455 _data
= (void**)realloc(_data
,sizeof(void*)*_grow
); //Reallocate to first growth increment.
1456 _room
= _grow
; //Mark it as such.
1457 _size
= 0; //Mark array as empty.
1460 virtual void*& operator[](unsigned int i
) //Access element at subscript, or dummy value if overflow.
1462 static void* dummy
= 0;
1463 if(i
< _size
) return _data
[i
]; else return dummy
;
1465 unsigned int size(){ return _size
; } //Count data elements in the array.
1466 virtual void* at(unsigned int i
){ if(i
< _size
) return _data
[i
]; else return NULL
; } //Access element at subscript, or NULL if overflow.
1467 long push_back(void* element
) //Append a new element to the array.
1469 if(_data
) //Fail if no array.
1471 if(_size
< _room
) //There is enough allocated space.
1473 _data
[_size
] = element
; //Set it.
1474 _size
++; //Increment our count of elements.
1475 return _size
-1; //Return the element's subscript.
1477 else //Not enough room.
1479 void** temp
= (void**)realloc(_data
,sizeof(void*)*(_size
+_grow
)); //Grow the array.
1480 if(temp
) //Reallocation succeeded.
1482 _room
+= _grow
; //Increment available space.
1483 _data
= temp
; //Assign reallocated value to array pointer.
1484 _data
[_size
] = element
; //Set the element to be added.
1485 _size
++; //Increment our count of elements.
1486 return _size
-1; //Return the element's subscript.
1490 return -1; //Something failed, so return a bad subscript.
1495 //<summary>A simple indentation stack.</summary>
1496 //<remarks>Used by xml_node::outer_xml function.</remarks>
1499 //Internal Data Members
1501 TCHAR _inch
; //The indent character.
1502 TCHAR
* _stac
; //The aggregate indent string (stack).
1503 int _size
; //Current depth (avoids using '_tcslen' on push/pop).
1504 //Construction/Destruction
1506 //<summary>Default constructor.</summary>
1507 //<param name="c">Indent character.</param>
1508 indent_stack(TCHAR c
= _T('\t')):
1513 _stac
= (TCHAR
*)malloc(sizeof(TCHAR
)); //Allocate.
1514 *_stac
= 0; //Zero-terminate.
1517 virtual ~indent_stack(){ if(_stac
) free(_stac
); }
1520 //<summary>Grow indent string by one indent character.</summary>
1521 //<remarks>Reallocates the indent string.</remarks>
1527 _stac
= (TCHAR
*)realloc(_stac
,sizeof(TCHAR
)*(_size
+1));
1528 _stac
[_size
-1] = _inch
;
1532 //<summary>Shrink the indent string by one indent character.</summary>
1535 if(_inch
&& _stac
&& _size
> 0)
1538 _stac
= (TCHAR
*)realloc(_stac
,sizeof(TCHAR
)*(_size
+1));
1542 //<summary>Accesses the indent depth.</summary>
1543 //<returns>The current indent string, or "" if empty.</returns>
1544 const TCHAR
* depth(){ return (_inch
&& _stac
) ? _stac
: _T(""); }
1549 // Stream output. Recursively writes the given xml_node_struct structure to
1550 // the given stream. NOTE: Use this recursive implementation for debug purposes
1551 // only, since a large tree may cause a stack overflow.
1553 //<param name="os">Reference to output stream.</param>
1554 //<param name="indent">Reference to indentation stack.</param>
1555 //<param name="node">Pointer to the node.</param>
1556 //<param name="breaks">Use linebreaks?</param>
1558 // String data is written to stream. Indent stack may be altered.
1559 // If you want to make this prettier, and to avoid propagating whitespace,
1560 // you will have to trim excess whitespace from the PCDATA sections.
1562 inline static void outer_xml(std::basic_ostream
<TCHAR
,std::char_traits
<TCHAR
> > & os
,indent_stack
& indent
,xml_node_struct
* node
,bool breaks
= true)
1564 if(node
&& os
.good()) //There is a node and ostream is OK.
1566 register unsigned int n
, i
;
1567 os
<< indent
.depth();
1570 case node_dtd_attlist
:
1573 #ifdef PUGOPT_NONSEG
1574 os
<< _T("<!ATTLIST ");
1575 os
.write( node
->name
, node
->name_size
);
1577 os
<< _T("<!ATTLIST ") << node
->name
;
1580 #ifdef PUGOPT_NONSEG
1583 os
.write( node
->value
, node
->value_size
);
1586 os
<< _T(" ") << node
->value
;
1592 case node_dtd_element
:
1595 #ifdef PUGOPT_NONSEG
1596 os
<< _T("<!ELEMENT ");
1597 os
.write( node
->name
, node
->name_size
);
1601 os
.write( node
->value
, node
->value_size
);
1604 os
<< _T("<!ELEMENT ") << node
->name
;
1605 if(node
->value
) os
<< _T(" ") << node
->value
;
1610 case node_dtd_entity
:
1613 #ifdef PUGOPT_NONSEG
1614 os
<< _T("<!ENTITY ");
1615 os
.write( node
->name
, node
->name_size
);
1619 os
.write( node
->value
, node
->value_size
);
1622 os
<< _T("<!ENTITY ") << node
->name
;
1623 if(node
->value
) os
<< _T(" ") << node
->value
;
1628 case node_dtd_notation
:
1631 #ifdef PUGOPT_NONSEG
1632 os
<< _T("<!NOTATION ");
1633 os
.write( node
->name
, node
->name_size
);
1637 os
.write( node
->value
, node
->value_size
);
1640 os
<< _T("<!NOTATION ") << node
->name
;
1641 if(node
->value
) os
<< _T(" ") << node
->value
;
1647 os
<< _T("<!DOCTYPE");
1648 n
= node
->attributes
;
1652 if(node
->attribute
[i
]->name
)
1653 #ifdef PUGOPT_NONSEG
1654 os
.write( node
->attribute
[i
]->name
, node
->attribute
[i
]->name_size
);
1656 os
<< node
->attribute
[i
]->name
;
1658 else if(node
->attribute
[i
]->value
)
1659 #ifdef PUGOPT_NONSEG
1662 os
.write( node
->attribute
[i
]->value
, node
->attribute
[i
]->value_size
);
1666 os
<< _T("\"") << node
->attribute
[i
]->value
<< _T("\"");
1671 if(breaks
) os
<< std::endl
;
1674 if(breaks
) os
<< std::endl
;
1677 indent
.push(); //Push the indent stack.
1682 node
->child
[i
] && //There is a child at i.
1684 node
->child
[i
]->type
== node_dtd_attlist
|| //Skip all other types.
1685 node
->child
[i
]->type
== node_dtd_element
||
1686 node
->child
[i
]->type
== node_dtd_entity
||
1687 node
->child
[i
]->type
== node_dtd_notation
1690 outer_xml(os
,indent
,node
->child
[i
],breaks
);
1692 indent
.pop(); //Pop the indent stack.
1695 else if(node
->value
)
1696 #ifdef PUGOPT_NONSEG
1699 os
.write(node
->value
,node
->value_size
);
1703 os
<< _T(" [") << node
->value
<< _T("]");
1708 #ifdef PUGOPT_NONSEG
1709 if(node
->value
) os
.write(node
->value
,node
->value_size
);
1711 if(node
->value
) os
<< node
->value
;
1715 #ifdef PUGOPT_NONSEG
1718 os
<< _T("<![CDATA[");
1719 os
.write(node
->value
,node
->value_size
);
1723 if(node
->value
) os
<< _T("<![CDATA[") << node
->value
<< _T("]]>");
1727 #ifdef PUGOPT_NONSEG
1730 os
<< _T("<![INCLUDE[");
1731 os
.write(node
->value
, node
->value_size
);
1735 if(node
->value
) os
<< _T("<![INCLUDE[") << node
->value
<< _T("]]>");
1739 #ifdef PUGOPT_NONSEG
1743 os
.write(node
->value
, node
->value_size
);
1747 if(node
->value
) os
<< _T("<!--") << node
->value
<< _T("-->");
1753 if(node
->type
==node_pi
) os
<< _T("?");
1755 #ifdef PUGOPT_NONSEG
1756 os
.write(node
->name
,node
->name_size
);
1760 else os
<< _T("anonymous");
1761 n
= node
->attributes
;
1764 if(node
->attribute
[i
] && node
->attribute
[i
]->name
)
1766 #ifdef PUGOPT_NONSEG
1768 os
.write(node
->attribute
[i
]->name
,node
->attribute
[i
]->name_size
);
1769 if(node
->attribute
[i
]->value
)
1772 os
.write(node
->attribute
[i
]->value
,node
->attribute
[i
]->value_size
);
1776 os
<< _T(" ") << node
->attribute
[i
]->name
;
1777 if(node
->attribute
[i
]->value
) os
<< _T("=\"") << node
->attribute
[i
]->value
<< _T("\"");
1782 if(n
&& node
->type
== node_element
)
1785 if(n
== 1 && node
->child
[0]->type
== node_pcdata
)
1787 if(node
->child
[0] && node
->child
[0]->value
)
1788 #ifdef PUGOPT_NONSEG
1789 os
.write(node
->child
[0]->value
,node
->child
[0]->value_size
);
1791 os
<< node
->child
[0]->value
;
1796 if(breaks
) os
<< std::endl
;
1798 for(i
=0; i
<n
; ++i
) pug::outer_xml(os
,indent
,node
->child
[i
],breaks
);
1800 os
<< indent
.depth();
1803 #ifdef PUGOPT_NONSEG
1805 os
.write(node
->name
, node
->name_size
);
1807 if(node
->name
) os
<< node
->name
;
1813 if(node
->type
==node_pi
) os
<< _T("?>");
1814 else os
<< _T("/>");
1819 if(breaks
) os
<< std::endl
;
1825 //<summary>Abstract iterator class for interating over a node's members.</summary>
1826 //<remarks>Used as base class for 'xml_node_iterator' and 'xml_attribute_iterator'.</remarks>
1827 template <class _Ty
,class _Diff
,class _Pointer
,class _Reference
>
1828 class xml_iterator
: public std::_Ranit
<_Ty
,_Diff
,_Pointer
,_Reference
>
1831 xml_node_struct
* _vref
; //A pointer to the node over which to iterate.
1832 long _sscr
; //Current subscript of element.
1834 xml_iterator() : _vref(0), _sscr(-1) {} //Default constructor.
1835 xml_iterator(xml_node_struct
* vref
,long sscr
= 0) : _vref(vref
), _sscr(sscr
){ } //Initializing constructor.
1836 xml_iterator(const xml_iterator
& r
) : _vref(r
._vref
), _sscr(r
._sscr
){ } //Copy constructor.
1837 virtual ~xml_iterator(){} //Destructor.
1839 virtual bool good() = 0; //Internal validity of '_vref'.
1840 virtual bool oob() = 0; //Out of bounds check for '_sscr' with respect to '_vref'. Returns true if '_sscr' is O.O.B.
1842 virtual long subscript(){ return _sscr
; } //Get subscript value;
1843 virtual void subscript(long new_subscript
){ _sscr
= new_subscript
; } //Set subscript value;
1845 virtual xml_iterator
& operator=(const xml_iterator
& rhs
){ _vref
= rhs
._vref
; _sscr
= rhs
._sscr
; return *this; } //Assignment.
1846 virtual bool operator==(const xml_iterator
& rhs
){ return (_sscr
== rhs
._sscr
); } //True if this is equal to RHS.
1847 virtual bool operator!=(const xml_iterator
& rhs
){ return (_sscr
!= rhs
._sscr
); } //True if this is not equal to RHS.
1848 virtual bool operator<(const xml_iterator
& rhs
){ return (_sscr
< rhs
._sscr
); } //True if this subscript is less than RHS.
1849 virtual bool operator>(const xml_iterator
& rhs
){ return (_sscr
> rhs
._sscr
); } //True if this subscript is greater than RHS.
1850 virtual bool operator<=(const xml_iterator
& rhs
){ return (_sscr
<= rhs
._sscr
); } //True if this subscript is less than or equal to RHS.
1851 virtual bool operator>=(const xml_iterator
& rhs
){ return (_sscr
>= rhs
._sscr
); } //True if this subscript is greater than or equal to RHS.
1852 virtual xml_iterator
& operator++(){ _sscr
++; return *this; } //Increment the iterator (subscript).
1853 virtual xml_iterator
& operator--(){ _sscr
--; return *this; } //Decrement the iterator (subscript).
1854 virtual _Ty
& operator*() = 0; //Dereference operator.
1855 virtual _Ty
* operator->() = 0;
1858 class xml_node
; //Forward decl.
1861 //<summary>Abstract tree walker class for xml_node::traverse().</summary>
1862 class xml_tree_walker
1865 long _deep
; //Current node depth.
1867 xml_tree_walker() : _deep(0) {} //Default constructor.
1868 virtual ~xml_tree_walker(){} //Destructor.
1870 virtual void push(){ ++_deep
; } //Increment node depth.
1871 virtual void pop(){ --_deep
; } //Decrement node depth.
1872 virtual long depth(){ return (_deep
> 0) ? _deep
: 0; } //Access node depth.
1874 //<summary>Callback when traverse on a given root node begins.</summary>
1875 //<returns>Returning false will abort the traversal.</returns>
1876 //<remarks>Override this to implement your own custom behavior.</remarks>
1877 virtual bool begin(xml_node
&){ return true; }
1878 //<summary>Callback for each node that is hit on traverse.</summary>
1879 //<returns>Returning false will abort the traversal.</returns>
1880 virtual bool for_each(xml_node
&) = 0;
1881 //<summary>Callback when traverse on a given root node ends.</summary>
1882 //<returns>Returning false will abort the traversal.</returns>
1883 //<remarks>Override this to implement your own custom behavior.</remarks>
1884 virtual bool end(xml_node
&){ return true; }
1888 //<summary>Provides a light-weight wrapper for manipulating xml_attribute_struct structures.</summary>
1890 // Note: xml_attribute does not create any memory for the attribute it wraps;
1891 // it only wraps a pointer to an existing xml_attribute_struct.
1895 //Internal Data Members
1897 xml_attribute_struct
* _attr
; //The internal attribute pointer.
1898 //Construction/Destruction
1900 xml_attribute() : _attr(NULL
) {} //Default constructor.
1901 xml_attribute(xml_attribute_struct
* attr
) : _attr(attr
) {} //Initializing constructor.
1902 xml_attribute(const xml_attribute
& r
) : _attr(r
._attr
) {} //Copy constructor.
1903 virtual ~xml_attribute(){} //Destructor.
1906 void attach(xml_attribute_struct
* v
){ _attr
= v
; }
1907 xml_attribute
& operator=(const xml_attribute
& r
){ _attr
= r
._attr
; return *this; } //Assign internal pointer.
1908 bool operator==(const xml_attribute
& r
){ return (_attr
== r
._attr
); } //Compare internal pointer.
1909 bool operator!=(const xml_attribute
& r
){ return (_attr
!= r
._attr
); }
1910 operator xml_attribute_struct
*(){ return _attr
; }
1911 //<summary>Cast attribute value as std::string. If not found, return empty.</summary>
1912 //<returns>The std::string attribute value, or empty.</returns>
1913 //<remarks>Note: Modifying this will not change the value, e.g. read only.</remarks>
1914 operator std::string()
1917 if(!empty() && has_value())
1919 #ifdef PUGOPT_NONSEG
1920 temp
.append(_attr
->value
,_attr
->value_size
);
1922 temp
= _attr
->value
;
1927 //<summary>Cast attribute value as integral character string. If not found, return NULL.</summary>
1928 //<returns>Integral character string attribute value, or NULL.</returns>
1929 //<remarks>Warning: Modifying this may corrupt portions of the document tree.</remarks>
1930 operator const TCHAR
*()
1932 if(empty() || !has_value()) return NULL
;
1933 return _attr
->value
;
1935 //<summary>Cast attribute value as long. If not found, return 0.</summary>
1936 //<returns>Attribute value as long, or 0.</returns>
1937 //<remarks>Note: Modifying this will not change the value, e.g. read only.</remarks>
1940 if(empty() || !has_value()) return 0;
1941 #ifdef PUGOPT_NONSEG
1942 TCHAR temp
[PUGDEF_ATTR_VALU_SIZE
];
1943 unsigned int valulen
= sizeof(temp
)-1;
1944 const unsigned int maxlen
= valulen
? min(valulen
,_attr
->value_size
) : _attr
->value_size
;
1945 _tcsncpy(temp
,_attr
->value
,maxlen
);
1947 return _tcstol(temp
,NULL
,10);
1949 return _tcstol(_attr
->value
,NULL
,10);
1952 //<summary>Cast attribute value as double. If not found, return 0.0.</summary>
1953 //<returns>Attribute value as double, or 0.0.</returns>
1954 //<remarks>Note: Modifying this will not change the value, e.g. read only.</remarks>
1957 if(empty() || !has_value()) return 0.0;
1958 #ifdef PUGOPT_NONSEG
1959 TCHAR temp
[PUGDEF_ATTR_VALU_SIZE
];
1960 unsigned int valulen
= sizeof(temp
)-1;
1961 const unsigned int maxlen
= valulen
? min(valulen
,_attr
->value_size
) : _attr
->value_size
;
1962 _tcsncpy(temp
,_attr
->value
,maxlen
);
1964 return _tcstod(temp
,0);
1966 return _tcstod(_attr
->value
,0);
1969 //<summary>Cast attribute value as bool. If not found, return false.</summary>
1970 //<returns>Attribute value as bool, or false.</returns>
1971 //<remarks>Note: Modifying this will not change the value, e.g. read only.</remarks>
1974 if(empty() || !has_value()) return false;
1977 return //Only look at first char:
1979 *(_attr
->value
) == _T('1') || //1*
1980 *(_attr
->value
) == _T('t') || //t* (true)
1981 *(_attr
->value
) == _T('T') || //T* (True|true)
1982 *(_attr
->value
) == _T('y') || //y* (yes)
1983 *(_attr
->value
) == _T('Y') //Y* (Yes|YES)
1985 ? true : false; //Return true if matches above, else false.
1988 //<summary>Set attribute to std::string.</summary>
1989 //<param name="rhs">Value std::string to set.</param>
1990 //<returns>Reference to xml_attribute.</returns>
1991 xml_attribute
& operator=(const std::string
& rhs
){ value(rhs
.c_str()); return *this; }
1992 //<summary>Set attribute to string.</summary>
1993 //<param name="rhs">Value string to set.</param>
1994 //<returns>Reference to xml_attribute.</returns>
1995 xml_attribute
& operator=(const TCHAR
* rhs
){ if(rhs
) value(rhs
); return *this; }
1996 //<summary>Set attribute to long.</summary>
1997 //<param name="rhs">Value long to set.</param>
1998 //<returns>Reference to xml_attribute.</returns>
1999 xml_attribute
& operator=(long rhs
)
2001 TCHAR temp
[32] = {0};
2002 _stprintf(temp
,_T("%ld"),rhs
);
2006 //<summary>Set attribute to double.</summary>
2007 //<param name="rhs">Value double to set.</param>
2008 //<returns>Reference to xml_attribute.</returns>
2009 xml_attribute
& operator=(double rhs
)
2011 TCHAR temp
[32] = {0};
2012 _stprintf(temp
,_T("%lf"),rhs
);
2016 //<summary>Set attribute to bool.</summary>
2017 //<param name="rhs">Value bool to set.</param>
2018 //<returns>Reference to xml_attribute.</returns>
2019 xml_attribute
& operator=(bool rhs
)
2021 value(rhs
?_T("true"):_T("false"));
2024 //<summary>Right-shift attribute value to std::string.</summary>
2025 //<param name="rhs">Reference to std::string to set.</param>
2026 //<returns>Reference to xml_attribute.</returns>
2027 xml_attribute
& operator>>(std::string
& rhs
)
2029 #ifdef PUGOPT_NONSEG
2031 rhs
.append(_attr
->value
,_attr
->value_size
);
2037 //<summary>Right-shift attribute value to long.</summary>
2038 //<param name="rhs">Reference to long to set.</param>
2039 //<returns>Reference to xml_attribute.</returns>
2040 xml_attribute
& operator>>(long& rhs
){ rhs
= (long)*this; return *this; }
2041 //<summary>Right-shift attribute value to double.</summary>
2042 //<param name="rhs">Reference to double to set.</param>
2043 //<returns>Reference to xml_attribute.</returns>
2044 xml_attribute
& operator>>(double& rhs
){ rhs
= (double)*this; return *this; }
2045 //<summary>Right-shift attribute value to bool.</summary>
2046 //<param name="rhs">Reference to bool to set.</param>
2047 //<returns>Reference to xml_attribute.</returns>
2048 xml_attribute
& operator>>(bool& rhs
){ rhs
= (bool)*this; return *this; }
2049 //<summary>Left-shift attribute value to long.</summary>
2050 //<param name="lhs">Reference to long to set.</param>
2051 //<param name="rhs">Reference to xml_attribute to read.</param>
2052 //<returns>Reference to long.</returns>
2053 friend long& operator<<(long& lhs
,xml_attribute
& rhs
){ lhs
= (long)rhs
; return lhs
; }
2054 //<summary>Left-shift attribute value to double.</summary>
2055 //<param name="lhs">Reference to double to set.</param>
2056 //<param name="rhs">Reference to xml_attribute to read.</param>
2057 //<returns>Reference to double.</returns>
2058 friend double& operator<<(double& lhs
,xml_attribute
& rhs
){ lhs
= (double)rhs
; return lhs
; }
2059 //<summary>Left-shift attribute value to bool.</summary>
2060 //<param name="lhs">Reference to bool to set.</param>
2061 //<param name="rhs">Reference to xml_attribute to read.</param>
2062 //<returns>Reference to bool.</returns>
2063 friend bool& operator<<(bool& lhs
,xml_attribute
& rhs
){ lhs
= (bool)rhs
; return lhs
; }
2064 //<summary>Left-shift long to attribute value.</summary>
2065 //<param name="lhs">Reference to xml_attribute to set.</param>
2066 //<param name="rhs">Reference to long to read.</param>
2067 //<returns>Reference to xml_attribute.</returns>
2068 friend xml_attribute
& operator<<(xml_attribute
& lhs
,const long rhs
){ lhs
= rhs
; return lhs
; }
2069 //<summary>Left-shift double to attribute value.</summary>
2070 //<param name="lhs">Reference to xml_attribute to set.</param>
2071 //<param name="rhs">Reference to double to read.</param>
2072 //<returns>Reference to xml_attribute.</returns>
2073 friend xml_attribute
& operator<<(xml_attribute
& lhs
,const double& rhs
){ lhs
= rhs
; return lhs
; }
2074 //<summary>Left-shift bool to attribute value.</summary>
2075 //<param name="lhs">Reference to xml_attribute to set.</param>
2076 //<param name="rhs">Reference to bool to read.</param>
2077 //<returns>Reference to xml_attribute.</returns>
2078 friend xml_attribute
& operator<<(xml_attribute
& lhs
,const bool& rhs
){ lhs
= rhs
; return lhs
; }
2080 bool empty(){ return (_attr
== NULL
); } //True if the internal xml_attribute_struct pointer is NULL.
2081 bool has_name(){ return (!empty() && _attr
->name
); } //True if the attribute has a name.
2082 bool has_value(){ return (!empty() && _attr
->value
); } //True if the attribute has a value.
2083 #ifdef PUGOPT_NONSEG
2084 bool has_name(const TCHAR
* name
) { return (name
&& !empty() && has_name() && _tcsncmp(_attr
->name
,name
,_attr
->name_size
)==0); } //Is named 'name'.
2085 bool has_value(const TCHAR
* value
) { return (value
&& !empty() && has_value() && _tcsncmp(_attr
->value
,value
,_attr
->value_size
)==0); } //Has value 'value'.
2087 bool has_name(const TCHAR
* name
) { return (name
&& !empty() && has_name() && _tcscmp(_attr
->name
,name
)==0); } //Is named 'name'.
2088 bool has_value(const TCHAR
* value
) { return (value
&& !empty() && has_value() && _tcscmp(_attr
->value
,value
)==0); } //Has value 'value'.
2091 const TCHAR
* name(){ return (!empty() && _attr
->name
) ? _attr
->name
: _T(""); } //Access the attribute name.
2092 #ifdef PUGOPT_NONSEG
2093 const unsigned int name_size(){ return (!empty()) ? _attr
->name_size
: 0; } //Access the attribute name length (for PUGOPT_NONSEG).
2095 bool name(TCHAR
* new_name
) //Set the attribute name.
2097 if(!empty() && new_name
)
2098 #ifdef PUGOPT_NONSEG
2099 return strcpyinsitu(&_attr
->name
,new_name
,&_attr
->name_insitu
,_attr
->name_size
);
2101 return strcpyinsitu(&_attr
->name
,new_name
,&_attr
->name_insitu
);
2105 const TCHAR
* value(){ return (!empty()) ? _attr
->value
: _T(""); } //Access the attribute value.
2106 #ifdef PUGOPT_NONSEG
2107 const unsigned int value_size(){ return (!empty()) ? _attr
->value_size
: 0; } //Access the attribute name length (for PUGOPT_NONSEG).
2109 bool value(const TCHAR
* new_value
) //Set the attribute value.
2111 if(!empty() && new_value
)
2112 #ifdef PUGOPT_NONSEG
2113 return strcpyinsitu(&_attr
->value
,new_value
,&_attr
->value_insitu
,_attr
->value_size
);
2115 return strcpyinsitu(&_attr
->value
,new_value
,&_attr
->value_insitu
);
2122 class xml_node
; //Forward declaration.
2125 //<summary>Forward wrapper for any as-yet undefined class.</summary>
2127 // Used by xml_node_iterator, and xml_attribute_iterator to assist with
2128 // operator->(), and operator*() mapping to xml_node and xml_attribute
2131 template <typename TYPE
> class forward_class
2134 TYPE
* _obj
; //The class, internal.
2136 forward_class() : _obj(NULL
) { _obj
= new TYPE(); } //Default constructor.
2137 forward_class(const TYPE
& r
) : _obj(NULL
) { _obj
= new TYPE(r
); } //Copy constructor.
2138 virtual ~forward_class(){ if(_obj
) delete _obj
; } //Destructor.
2140 TYPE
& operator* (){ return *_obj
; } //Dereference to the class.
2141 TYPE
* operator->(){ return _obj
; } //Class member selection.
2142 operator TYPE (){ return *_obj
; } //Cast as class type.
2143 operator TYPE
&(){ return *_obj
; } //Cast as class type reference.
2144 operator TYPE
*(){ return _obj
; } //Cast as class type pointer.
2148 //<summary>Provides a light-weight wrapper for manipulating xml_node_struct structures.</summary>
2151 //Internal Data Members
2154 xml_node_struct
* _root
; //Pointer to node root.
2155 xml_node_struct _dummy
; //Utility.
2157 //Construction/Destruction
2160 //<summary>Default constructor.</summary>
2162 // Node root points to a dummy 'xml_node_struct' structure. Test for this
2165 xml_node(): _root(0)
2167 memset(&_dummy
,0,sizeof(xml_node_struct
));
2168 _dummy
.type
= node_null
;
2169 _dummy
.parent
= &_dummy
;
2173 //<summary>Construct, wrapping the given 'xml_node_struct' pointer.</summary>
2174 //<param name="p">Pointer to node to wrap.</param>
2175 //<remarks>It is possible that 'p' is NULL, so test for this with 'empty'.</remarks>
2176 xml_node(xml_node_struct
* p
): _root(p
) { memset(&_dummy
,0,sizeof(xml_node_struct
)); }
2178 //<summary>Copy constructor.</summary>
2179 //<param name="r">Reference to node.</param>
2181 // Only the root pointer is assigned, so both classes now in fact point
2182 // to the same structure.
2184 xml_node(const xml_node
& r
): _root(r
._root
) {}
2186 //<summary>Destructor.</summary>
2187 virtual ~xml_node(){}
2189 //<summary>Attach to the given structure.</summary>
2190 //<param name="p">Pointer to node structure to wrap.</param>
2191 //<returns>Pointer to previous node structure.</returns>
2192 xml_node_struct
* attach(xml_node_struct
* p
)
2194 xml_node_struct
* prev
= _root
;
2202 //<summary>Child node iterator.</summary>
2203 class xml_node_iterator
: public xml_iterator
<xml_node
,long,xml_node
*,xml_node
&>
2206 forward_class
<xml_node
> _wrap
; //Wrapper for xml_node.
2208 xml_node_iterator() : _wrap(), xml_iterator
<xml_node
,long,xml_node
*,xml_node
&>() {} //Default constructor.
2209 xml_node_iterator(xml_node_struct
* vref
,long sscr
= 0) : _wrap(), xml_iterator
<xml_node
,long,xml_node
*,xml_node
&>(vref
,sscr
) { } //Initializing constructor.
2210 xml_node_iterator(const xml_node_iterator
& r
) : _wrap(), xml_iterator
<xml_node
,long,xml_node
*,xml_node
&>(r
) { } //Copy constructor.
2211 virtual bool good() //Internal validity.
2215 _vref
!= 0 && //Pointing to some node.
2216 _vref
->child
!= 0 && //The node has an array of children.
2217 _vref
->children
> 0 //There are 1 or more children in the array.
2222 virtual bool oob() //Out of bounds check.
2226 !good() || //There is no data over which to iterate.
2227 _sscr
< 0 || //Subscript is out of range.
2228 _sscr
>= (long)_vref
->children
2233 //<summary>Pointer dereference for current xml_node.<summary>
2235 // Reference to the internal xml_node object, which wraps the
2236 // xml_node_struct corresponding to the node at the
2237 // current subscript.
2239 virtual xml_node
& operator*()
2241 if(!oob()) _wrap
->attach(_vref
->child
[_sscr
]);
2242 else _wrap
->attach(NULL
);
2243 return (xml_node
&)_wrap
;
2245 virtual xml_node
* operator->() //Member selection for current xml_node.
2247 if(!oob()) _wrap
->attach(_vref
->child
[_sscr
]);
2248 else _wrap
->attach(NULL
);
2249 return (xml_node
*)_wrap
;
2253 //<summary>Attribute iterator.</summary>
2254 class xml_attribute_iterator
: public xml_iterator
<xml_attribute
,long,xml_attribute
*,xml_attribute
&>
2257 forward_class
<xml_attribute
> _wrap
;
2259 xml_attribute_iterator() : _wrap(), xml_iterator
<xml_attribute
,long,xml_attribute
*,xml_attribute
&>() {} //Default constructor.
2260 xml_attribute_iterator(xml_node_struct
* vref
,long sscr
= 0) : _wrap(), xml_iterator
<xml_attribute
,long,xml_attribute
*,xml_attribute
&>(vref
,sscr
) { } //Initializing constructor.
2261 xml_attribute_iterator(const xml_attribute_iterator
& r
) : _wrap(), xml_iterator
<xml_attribute
,long,xml_attribute
*,xml_attribute
&>(r
) { } //Copy constructor.
2262 virtual bool good() //Internal validity check.
2266 _vref
!= 0 && //Pointing to some node.
2267 _vref
->attribute
!= 0 && //The node has an array of children.
2268 _vref
->attributes
> 0 //There are 1 or more children in the array.
2273 virtual bool oob() //Out of bounds check.
2277 !good() || //There is no data over which to iterate.
2278 _sscr
< 0 || //Subscript is out of range.
2279 _sscr
>= (long)_vref
->attributes
//For 'end'
2284 //<summary>Pointer dereference for current xml_attribute.</summary>
2286 // Reference to the internal xml_attribute object, which wraps the
2287 // xml_attribute_struct corresponding to the attribute at the
2288 // current subscript.
2290 virtual xml_attribute
& operator*()
2292 if(!oob()) _wrap
->attach(_vref
->attribute
[_sscr
]);
2293 else _wrap
->attach(NULL
);
2294 return (xml_attribute
&)_wrap
;
2296 //<summary>Member selection for current xml_attribute.</summary>
2297 //<returns></returns>
2298 virtual xml_attribute
* operator->()
2300 if(!oob()) _wrap
->attach(_vref
->attribute
[_sscr
]);
2301 else _wrap
->attach(NULL
);
2302 return (xml_attribute
*)_wrap
;
2306 //<summary>Base iterator type (for child nodes). Same as 'child_iterator'.</summary>
2307 typedef xml_node_iterator iterator
;
2308 //<summary>Base iterator type (for child nodes). Same as 'iterator'.</summary>
2309 typedef xml_node_iterator child_iterator
;
2310 //<summary>Base iterator type (for sibling nodes). Same as 'iterator'.</summary>
2311 typedef xml_node_iterator sibling_iterator
;
2312 //<summary>Attribute iterator type.</summary>
2313 typedef xml_attribute_iterator attribute_iterator
;
2315 //<summary>Access the begin iterator for this node's collection of child nodes.</summary>
2316 //<returns>The begin iterator for this node's collection of child nodes.</returns>
2317 //<remarks>Same as 'children_begin'.</remarks>
2318 iterator
begin(){ return iterator(_root
,0); }
2319 //<summary>Access the end iterator for this node's collection of child nodes.</summary>
2320 //<returns>The end iterator for this node's collection of child nodes.</returns>
2321 //<remarks>Same as 'children_end'.</remarks>
2322 iterator
end(){ return iterator(_root
,_root
->children
); }
2323 //<summary>Erase the given node from node's collection of child nodes.</summary>
2324 //<returns>The begin iterator for this node's collection of child nodes.</returns>
2325 //<remarks>Same as 'children_erase'.</remarks>
2326 iterator
erase(iterator where
){ remove_child((unsigned int)where
.subscript()); return iterator(_root
,0); }
2328 //<summary>Access the begin iterator for this node's collection of child nodes.</summary>
2329 //<returns>The begin iterator for this node's collection of child nodes.</returns>
2330 //<remarks>Same as 'begin'.</remarks>
2331 child_iterator
children_begin(){ return child_iterator(_root
,0); }
2332 //<summary>Access the end iterator for this node's collection of child nodes.</summary>
2333 //<returns>The end iterator for this node's collection of child nodes.</returns>
2334 //<remarks>Same as 'end'.</remarks>
2335 child_iterator
children_end(){ return child_iterator(_root
,_root
->children
); }
2336 //<summary>Erase the given node from node's collection of child nodes.</summary>
2337 //<returns>The begin iterator for this node's collection of child nodes.</returns>
2338 //<remarks>Same as 'erase'.</remarks>
2339 child_iterator
children_erase(child_iterator where
){ remove_child((unsigned int)where
.subscript()); return child_iterator(_root
,0); }
2341 //<summary>Access the begin iterator for this node's collection of attributes.</summary>
2342 //<returns>The begin iterator for this node's collection of attributes.</returns>
2343 attribute_iterator
attributes_begin(){ return attribute_iterator(_root
,0); }
2344 //<summary>Access the end iterator for this node's collection of attributes.</summary>
2345 //<returns>The end iterator for this node's collection of attributes.</returns>
2346 attribute_iterator
attributes_end(){ return attribute_iterator(_root
,_root
->attributes
); }
2347 //<summary>Erase the given attribute from node's collection of attributes.</summary>
2348 //<returns>The begin iterator for this node's collection of attributes.</returns>
2349 attribute_iterator
attributes_erase(attribute_iterator where
){ remove_attribute((unsigned int)where
.subscript()); return attribute_iterator(_root
,0); }
2351 //<summary>Access the begin iterator for this node's collection of siblings.</summary>
2352 //<returns>The begin iterator for this node's collection of siblings.</returns>
2353 sibling_iterator
siblings_begin(){ if(!empty()) return sibling_iterator(_root
->parent
,0); return sibling_iterator(); }
2354 //<summary>Access the end iterator for this node's collection of siblings.</summary>
2355 //<returns>The end iterator for this node's collection of siblings.</returns>
2356 sibling_iterator
siblings_end(){ if(!empty()) return sibling_iterator(_root
->parent
,_root
->parent
->children
); return sibling_iterator(); }
2357 //<summary>Erase the given sibling from node's collection of siblings.</summary>
2358 //<returns>The begin iterator for this node's collection of siblings.</returns>
2359 sibling_iterator
siblings_erase(sibling_iterator where
){ parent().remove_child((unsigned int)where
.subscript()); return iterator(_root
->parent
,0); }
2361 //Overloaded Operators
2364 operator xml_node_struct
*(){ return _root
; } //Cast as xml_node_struct pointer.
2365 operator void*(){ return (void*)_root
; } //Cast root as void*.
2366 xml_node
& operator=(const xml_node
& r
){ _root
= r
._root
; return *this; } //Assign to xml_node_struct pointer.
2367 bool operator==(const xml_node
& r
){ return (_root
== r
._root
); } //True if this has the same internal xml_node_struct pointer value.
2368 xml_node
operator[](unsigned int i
){ return child(i
); } //Access the child at subscript.
2370 //Node Classification
2373 bool empty() { return (_root
== 0 || _root
->type
== node_null
); } //Node pointer is null, or type is node_null. Same as type_null.
2374 bool type_null() { return empty(); } //Node pointer is null, or type is node_null. Same as empty.
2375 bool type_document() { return (_root
&& _root
== _root
->parent
&& _root
->type
== node_document
); } //Node is tree root.
2376 bool type_element() { return (_root
&& _root
->type
== node_element
); } //Node is an element.
2377 bool type_comment() { return (_root
&& _root
->type
== node_comment
); } //Node is a comment.
2378 bool type_pcdata() { return (_root
&& _root
->type
== node_pcdata
); } //Node is PCDATA.
2379 bool type_cdata() { return (_root
&& _root
->type
== node_cdata
); } //Node is CDATA.
2380 bool type_include() { return (_root
&& _root
->type
== node_include
); } //Node is INCLUDE.
2381 bool type_pi() { return (_root
&& _root
->type
== node_pi
); } //Node is a processing instruction.
2382 bool type_doctype() { return (_root
&& _root
->type
== node_doctype
); } //Node is DOCTYPE.
2383 bool type_dtd_item() { return (_root
&& _root
->type
> node_doctype
); } //Node is NODE_DTD_*.
2384 bool type_dtd_attlist() { return (_root
&& _root
->type
== node_dtd_attlist
); } //Node is node_dtd_attlist.
2385 bool type_dtd_element() { return (_root
&& _root
->type
== node_dtd_element
); } //Node is node_dtd_element.
2386 bool type_dtd_entity() { return (_root
&& _root
->type
== node_dtd_entity
); } //Node is node_dtd_entity.
2387 bool type_dtd_notation() { return (_root
&& _root
->type
== node_dtd_notation
); } //Node is node_dtd_notation.
2392 bool has_value() { return (!empty() && _root
->value
!= 0); } //Node has data (comment, CDATA or PCDATA).
2393 bool has_child_nodes() { return (!empty() && children() > 0); } //Node has 1 or more children.
2394 bool has_attributes() { return (!empty() && attributes() > 0); } //Node has 1 or more attributes.
2395 bool has_siblings() { return (!empty() && siblings() > 0); } //Node has one or more siblings.
2396 bool has_name() { return (!empty() && _root
->name
!= 0); } //Node has a name.
2397 bool has_name(const std::string
& name
) const { return has_name(name
.c_str()); } //Node is named 'name'.
2398 bool has_attribute(const std::string
& name
) { return has_attribute(name
.c_str()); } //Node has an attribute named 'name'.
2399 #ifdef PUGOPT_NONSEG
2400 bool has_name(const TCHAR
* name
) const { return (name
&& _root
&& _root
->name
&& _tcsncmp(_root
->name
,name
,_root
->name_size
)==0); } //Node is named 'name'.
2402 bool has_name(const TCHAR
* name
) const { return (name
&& _root
&& _root
->name
&& strcmpwild(name
,_root
->name
)==0); } //Node is named 'name'.
2404 bool has_attribute(const TCHAR
* name
){ return (mapto_attribute_idx(name
) > -1); } //Node has an attribute named name.
2409 #ifdef PUGOPT_NONSEG
2411 //<summary>Access node name if any.</summary>
2412 //<returns>Name, or dummy value if the no name.</returns>
2413 //<remarks>Only returns up to 'PUGDEF_ELEM_NAME_SIZE' chars of name.</remarks>
2416 static TCHAR temp
[PUGDEF_ELEM_NAME_SIZE
] = {0};
2419 _tcsncpy(temp
,_root
->name
,_root
->name_size
);
2420 temp
[_root
->name_size
<PUGDEF_ELEM_NAME_SIZE
?_root
->name_size
:(PUGDEF_ELEM_NAME_SIZE
-1)] = 0;
2425 unsigned int name_size(){ return (has_name()) ? _root
->name_size
: 0; } //Get node name length if any, else 0.
2426 unsigned int value_size(){ return (has_value()) ? _root
->value_size
: 0; } //Get node value length if any, else 0.
2427 inline bool matches_attribute_name(const TCHAR
* name
,const unsigned int namelen
,const int i
) const { return (_tcsncmp(name
,_root
->attribute
[i
]->name
,max(namelen
,_root
->attribute
[i
]->name_size
))==0); } //There is an attribute at 'i' named 'name'.
2428 inline bool matches_child_name(const TCHAR
* name
,const unsigned int namelen
,const int i
) const { return (_tcsncmp(name
,_root
->child
[i
]->name
,max(namelen
,_root
->child
[i
]->name_size
))==0); } //There is a child at 'i' named 'name'.
2429 inline bool matches_name(const TCHAR
* name
,const unsigned int namelen
,xml_node_struct
* node
) const { return (_tcsncmp(name
,node
->name
,max(namelen
,node
->name_size
))==0); } //This is named 'name'.
2430 inline bool matches_value(const TCHAR
* data
,const unsigned int datalen
,xml_node_struct
* node
) const { return (_tcsncmp(data
,node
->value
,max(datalen
,node
->value_size
))==0); } //This is valued 'value'.
2431 inline bool matches_attribute_name(const TCHAR
* name
,const unsigned int namelen
,xml_attribute_struct
* attr
) const { return (_tcsncmp(name
,attr
->name
,max(namelen
,attr
->name_size
))==0); } //The given attribute is named 'name'.
2432 inline bool matches_attribute_name_value(const TCHAR
* value
,const unsigned int valulen
,xml_attribute_struct
* attr
) const { return (_tcsncmp(value
,attr
->value
,max(valulen
,attr
->value_size
))==0); } //The given attribute is valued 'value'.
2434 const TCHAR
* name(){ return (has_name()) ? _root
->name
: _T(""); } //Access pointer to node name if any, else empty string.
2435 inline bool matches_attribute_name(const TCHAR
* name
,const unsigned int i
) const { return (strcmpwild(name
,_root
->attribute
[i
]->name
)==0); } //There is an attribute at 'i' named 'name'.
2436 inline bool matches_child_name(const TCHAR
* name
,const unsigned int i
) const { return (strcmpwild(name
,_root
->child
[i
]->name
)==0); } //There is a child at 'i' named 'name'.
2437 inline bool matches_name(const TCHAR
* name
,xml_node_struct
* node
) const { return (strcmpwild(name
,node
->name
)==0); } //This is named 'name'.
2438 inline bool matches_value(const TCHAR
* data
,xml_node_struct
* node
) const { return (strcmpwild(data
,node
->value
)==0); } //This is valued 'value'.
2439 inline bool matches_attribute_name(const TCHAR
* attribute
,xml_attribute_struct
* attr
) const { return (strcmpwild(attribute
,attr
->name
)==0); } //The given attribute is named 'name'.
2440 inline bool matches_attribute_name_value(const TCHAR
* value
,xml_attribute_struct
* attr
) const { return (strcmpwild(value
,attr
->value
)==0); } //The given attribute is valued 'value'.
2442 xml_node_type
type() const { return (_root
) ? (xml_node_type
)_root
->type
: node_null
; } //Access node entity type.
2443 const TCHAR
* value() { return (has_value()) ? _root
->value
: _T(""); } //Access pointer to data if any, else empty string.
2444 unsigned int children() const { return _root
->children
; } //Access node's child count.
2445 xml_node
child(unsigned int i
){ return (i
< children()) ? xml_node(_root
->child
[i
]) : xml_node(); } //Access child node at subscript as xml_node or xml_node(NULL) if bad subscript.
2446 unsigned int attributes() const { return _root
->attributes
; } //Access node's attribute count.
2447 xml_attribute
attribute(unsigned int i
){ return (i
< attributes()) ? xml_attribute(_root
->attribute
[i
]) : xml_attribute(); } //Access attribute at subscript if any, else empty attribute.
2448 //<summary>Access or create the attribute having 'name'.</summary>
2449 //<param name="name">Name of attribute to access/create.</param>
2450 //<returns>Reference to xml_attribute wrapper.</returns>
2451 xml_attribute
attribute(const std::string
& name
){ return attribute(name
.c_str()); }
2452 //<summary>Access or create the attribute having 'name'.</summary>
2453 //<param name="name">Name of attribute to access/create.</param>
2454 //<returns>Reference to xml_attribute wrapper.</returns>
2455 xml_attribute
attribute(const TCHAR
* name
)
2457 xml_attribute_struct
* attr
= mapto_attribute_ptr(name
);
2458 if(!attr
) attr
= append_attribute(name
,_T(""));
2459 return xml_attribute(attr
);
2461 const unsigned int siblings(){ return (!type_document()) ? _root
->parent
->children
: 0; } //Access node's sibling count (parent's child count).
2462 xml_node
sibling(unsigned int i
){ return (!type_document() && i
< siblings()) ? xml_node(_root
->parent
->child
[i
]) : xml_node(); } //Access sibling node at subscript as xml_node or xml_node(NULL) if bad subscript.
2463 xml_node
parent(){ return (!type_document()) ? xml_node(_root
->parent
) : xml_node(); } //Access node's parent if any, else xml_node(NULL)
2465 //<summary>Return the first child that has data's data. If none, return NULL.</summary>
2466 //<param name="value">Returns a copy of the data.</param>
2467 //<param name="valuelen">Specifies the maximum number of characters to copy into value.</param>
2468 //<returns>Pointer to value if exists, else NULL.</returns>
2470 // Used to get the PCDATA for the current element. This handles elements
2471 // like: <LINE><STAGEDIR>Aside</STAGEDIR>Thy father,
2472 // Pompey, would ne'er have</LINE>, where 'this' points to <LINE>.
2474 TCHAR
* child_value(TCHAR
* value
,const unsigned int valuelen
)const
2478 for(register unsigned int i
=0; i
< _root
->children
; ++i
)
2480 xml_node_struct
* node
= _root
->child
[i
];
2483 const unsigned int n
=
2484 #ifdef PUGOPT_NONSEG
2485 (std::min
)(valuelen
,node
->value_size
);
2487 (std::min
)(valuelen
,unsigned(_tcslen(node
->value
)));
2489 _tcsncpy(value
,node
->value
,n
);
2499 //Name-To-Object Mapping
2502 //<summary>Map an attribute name to a pointer to that attribute, if found.</summary>
2503 //<param name="name">Reference to name of attribute to find.</param>
2504 //<returns>Pointer to attribute, or NULL if not found.</returns>
2505 //<remarks>Implement your own hash table if you have a great many attributes.</remarks>
2506 xml_attribute_struct
* mapto_attribute_ptr(const std::string
& name
){ return mapto_attribute_ptr(name
.c_str()); }
2508 //<summary>Map an attribute name to a pointer to that attribute, if found.</summary>
2509 //<param name="name">Pointer to name of attribute to find.</param>
2510 //<returns>Pointer to attribute, or NULL if not found.</returns>
2511 //<remarks>Implement your own hash table if you have a great many attributes.</remarks>
2512 xml_attribute_struct
* mapto_attribute_ptr(const TCHAR
* name
)
2514 if(!_root
|| !name
) return NULL
;
2515 register unsigned int n
= _root
->attributes
;
2516 #ifdef PUGOPT_NONSEG
2517 const int namelen
= _tcslen(name
);
2519 for(register unsigned int i
=0; i
<n
; ++i
)
2520 #ifdef PUGOPT_NONSEG
2521 if(matches_attribute_name(name
,namelen
,i
))
2523 if(matches_attribute_name(name
,i
))
2525 return _root
->attribute
[i
];
2529 //<summary>Map an attribute name to the index of that attribute, if found.</summary>
2530 //<param name="name">Pointer to name of attribute to find.</param>
2531 //<returns>Index of attribute, or -1 if not found.</returns>
2532 //<remarks>Implement your own hash table if you have a great many attributes.</remarks>
2533 int mapto_attribute_idx(const TCHAR
* name
)
2535 if(!_root
|| !name
) return -1;
2536 register unsigned int n
= _root
->attributes
;
2537 #ifdef PUGOPT_NONSEG
2538 const int namelen
= _tcslen(name
);
2540 for(register unsigned int i
=0; i
<n
; ++i
)
2541 #ifdef PUGOPT_NONSEG
2542 if(matches_attribute_name(name
,namelen
,i
))
2544 if(matches_attribute_name(name
,i
))
2550 //<summary>Map a child name to a pointer to the first instance, if found.</summary>
2551 //<param name="name">Reference to name of child to find.</param>
2552 //<returns>Index of child, or -1 if not found.</returns>
2553 //<remarks>Implement your own hash table if you have a great many children.</remarks>
2554 xml_node_struct
* mapto_child_ptr(const std::string
& name
){ return mapto_child_ptr(name
.c_str()); }
2556 //<summary>Map a child name to a pointer to the first instance, if found.</summary>
2557 //<param name="name">Pointer to name of child to find.</param>
2558 //<returns>Index of child, or -1 if not found.</returns>
2559 //<remarks>Implement your own hash table if you have a great many children.</remarks>
2560 xml_node_struct
* mapto_child_ptr(const TCHAR
* name
)
2562 if(!_root
|| !name
) return NULL
;
2563 register unsigned int n
= _root
->children
;
2564 #ifdef PUGOPT_NONSEG
2565 const int namelen
= _tcslen(name
);
2567 for(register unsigned int i
=0; i
<n
; ++i
)
2571 _root
->child
[i
]->name
&&
2572 #ifdef PUGOPT_NONSEG
2573 matches_child_name(name
,namelen
,i
)
2575 matches_child_name(name
,i
)
2578 return _root
->child
[i
];
2583 //<summary>Map a child name to the index of the first instance, if found.</summary>
2584 //<param name="name">Reference to name of child to find.</param>
2585 //<returns>Index of child, or -1 if not found.</returns>
2586 //<remarks>Implement your own hash table if you have a great many children.</remarks>
2587 int mapto_child_idx(const std::string
& name
){ return mapto_child_idx(name
.c_str()); }
2589 //<summary>Map a child name to the index of the first instance, if found.</summary>
2590 //<param name="name">Pointer to name of child to find.</param>
2591 //<returns>Index of child, or -1 if not found.</returns>
2592 //<remarks>Implement your own hash table if you have a great many children.</remarks>
2593 int mapto_child_idx(const TCHAR
* name
)
2595 if(!_root
|| !name
) return -1;
2596 register unsigned int n
= _root
->children
;
2597 #ifdef PUGOPT_NONSEG
2598 const int namelen
= _tcslen(name
);
2600 for(register unsigned int i
=0; i
<n
; ++i
)
2604 _root
->child
[i
]->name
&&
2605 #ifdef PUGOPT_NONSEG
2606 matches_child_name(name
,namelen
,i
)
2608 matches_child_name(name
,i
)
2619 //<summary>Find all elements having the given name.</summary>
2620 //<param name="name">Reference to name of child to find.</param>
2621 //<param name="found">Reference to xml_node_list or pointer_array to receive the matching elements.
2622 void all_elements_by_name(const std::string
& name
,pointer_array
& found
){ all_elements_by_name(name
.c_str(),found
); }
2624 //<summary>Find all elements having the given name.</summary>
2625 //<param name="name">Pointer to name of child to find.</param>
2626 //<param name="found">Reference to xml_node_list or pointer_array to receive the matching elements.</param>
2627 void all_elements_by_name(const TCHAR
* name
,pointer_array
& found
)
2629 if(empty() || !name
) return; //Invalid node, so fail.
2630 if(_root
->children
> 0) //Has children.
2632 #ifdef PUGOPT_NONSEG
2633 const unsigned int namelen
= _tcslen(name
);
2635 register unsigned int n
= _root
->children
; //For each child.
2636 for(register unsigned int i
=0; i
<n
; ++i
)
2640 _root
->child
[i
] && //There is a child at i.
2641 _root
->child
[i
]->name
&& //The child has a name.
2642 #ifdef PUGOPT_NONSEG
2643 matches_child_name(name
,namelen
,i
)
2645 matches_child_name(name
,i
)
2648 found
.push_back(_root
->child
[i
]); //push_back it to the array.
2649 if(_root
->child
[i
]->children
) //If there are children.
2651 xml_node
subsearch(_root
->child
[i
]); //Wrap it up for ease.
2652 subsearch
.all_elements_by_name(name
,found
); //Find any matching children.
2659 // Recursively-implemented depth-first find the first matching element.
2660 // Use for shallow drill-downs.
2662 //<param name="name">Const reference to name of element to find.</param>
2663 //<returns>Valid xml_node if such element named 'name' is found.</returns>
2664 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2665 xml_node
first_element_by_name(const std::string
& name
){ return first_element_by_name(name
.c_str()); }
2668 // Recursively-implemented depth-first find the first matching element.
2669 // Use for shallow drill-downs.
2671 //<param name="name">Pointer to name of element to find.</param>
2672 //<returns>Valid xml_node if such element named 'name' is found.</returns>
2673 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2674 xml_node
first_element_by_name(const TCHAR
* name
)
2676 if(empty() || !name
) return xml_node(); //Invalid node, so fail.
2677 if(_root
->children
> 0) //Has children.
2679 register unsigned int n
= _root
->children
; //For each child.
2680 #ifdef PUGOPT_NONSEG
2681 const int namelen
= _tcslen(name
);
2683 for(register unsigned int i
=0; i
<n
; ++i
)
2687 _root
->child
[i
]->name
&&
2688 #ifdef PUGOPT_NONSEG
2689 matches_child_name(name
,namelen
,i
)
2691 matches_child_name(name
,i
)
2694 return xml_node(_root
->child
[i
]);
2695 else if(_root
->child
[i
]->children
)
2697 xml_node
subsearch(_root
->child
[i
]); //Wrap it up for ease.
2698 xml_node found
= subsearch
.first_element_by_name(name
);
2699 if(!found
.empty()) return found
; //Found.
2703 return xml_node(); //Not found.
2707 // Recursively-implemented depth-first find the first matching element
2708 // also having matching PCDATA.
2710 //<param name="name">Reference to name of element to find.</param>
2711 //<param name="value">Reference to PCDATA to find.</param>
2712 //<returns>Valid xml_node if such element named 'name' is found with PCDATA 'value'.</returns>
2713 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2714 xml_node
first_element_by_value(const std::string
& name
,const std::string
& value
){ return first_element_by_value(name
.c_str(),value
.c_str()); }
2717 // Recursively-implemented depth-first find the first matching element
2718 // also having matching PCDATA.
2720 //<param name="name">Pointer to name of element to find.</param>
2721 //<param name="value">Pointer to PCDATA to find.</param>
2722 //<returns>Valid xml_node if such element named 'name' is found with PCDATA 'value'.</returns>
2723 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2724 xml_node
first_element_by_value(const TCHAR
* name
,const TCHAR
* value
)
2726 if(empty() || !name
|| !value
) return xml_node(); //Invalid node, so fail.
2727 if(_root
->children
> 0) //Has children.
2729 register unsigned int n
= _root
->children
; //For each child.
2730 #ifdef PUGOPT_NONSEG
2731 const unsigned int namelen
= _tcslen(name
);
2732 const unsigned int valulen
= _tcslen(value
);
2734 for(register unsigned int i
=0; i
<n
; ++i
)
2738 _root
->child
[i
] && //There is a child at i.
2739 _root
->child
[i
]->name
&& //The child has a name.
2740 #ifdef PUGOPT_NONSEG
2741 matches_child_name(name
,namelen
,i
)
2743 matches_child_name(name
,i
)
2747 register unsigned int m
= _root
->child
[i
]->children
; //For each child of child.
2748 for(register unsigned int j
=0; j
<m
; ++j
)
2752 _root
->child
[i
]->child
[j
] && //There is a child at j.
2753 _root
->child
[i
]->child
[j
]->type
== node_pcdata
&& //It is of the PCDATA type.
2754 _root
->child
[i
]->child
[j
]->value
&& //It has data.
2755 #ifdef PUGOPT_NONSEG
2756 matches_value(value
,valulen
,_root
->child
[i
]->child
[j
])
2758 matches_value(value
,_root
->child
[i
]->child
[j
])
2761 return xml_node(_root
->child
[i
]); //Wrap it up and return.
2764 else if(_root
->child
[i
] && _root
->child
[i
]->children
) //The child has children.
2766 xml_node
subsearch(_root
->child
[i
]); //Wrap it up for ease.
2767 xml_node found
= subsearch
.first_element_by_value(name
,value
); //Search any children.
2768 if(!found
.empty()) return found
; //Found.
2772 return xml_node(); //Not found.
2776 // Recursively-implemented depth-first find the first matching element
2777 // also having matching attribute.
2779 //<param name="name">Reference to name of element to find.</param>
2780 //<param name="attr_name">Reference to name of attribute to find.</param>
2781 //<param name="attr_value">Reference to attribute value to find.</param>
2782 //<returns>Valid xml_node if such element named 'name' is found.</returns>
2783 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2784 xml_node
first_element_by_attribute(const std::string
& name
,const std::string
& attr_name
,const std::string
& attr_value
){ return first_element_by_attribute(name
.c_str(),attr_name
.c_str(),attr_value
.c_str()); }
2787 // Recursively-implemented depth-first find the first matching element
2788 // also having matching attribute.
2790 //<param name="name">Pointer to name of element to find.</param>
2791 //<param name="attr_name">Pointer to name of attribute to find.</param>
2792 //<param name="attr_value">Pointer to attribute value to find.</param>
2793 //<returns>Valid xml_node if such element named 'name' is found.</returns>
2794 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2795 xml_node
first_element_by_attribute(const TCHAR
* name
,const TCHAR
* attr_name
,const TCHAR
* attr_value
)
2797 if(empty() || !name
|| !attr_name
|| !attr_value
) return xml_node(); //Invalid data, so fail.
2798 if(_root
->children
> 0) //Has children.
2800 #ifdef PUGOPT_NONSEG
2801 const unsigned int namelen
= _tcslen(name
);
2802 const unsigned int attrlen
= _tcslen(attr_name
);
2803 const unsigned int valulen
= _tcslen(attr_value
);
2805 register unsigned int n
= _root
->children
; //For each child.
2806 for(register unsigned int i
=0; i
<n
; ++i
)
2810 _root
->child
[i
] && //There is a child at i.
2811 _root
->child
[i
]->name
&& //The child has a name.
2812 #ifdef PUGOPT_NONSEG
2813 matches_name(name
,namelen
,_root
->child
[i
])
2815 matches_name(name
,_root
->child
[i
])
2819 register unsigned int m
= _root
->child
[i
]->attributes
; //For each attribute of child.
2820 for(register unsigned int j
=0; j
<m
; ++j
)
2824 _root
->child
[i
]->attribute
[j
] && //There is an attribute at j.
2825 _root
->child
[i
]->attribute
[j
]->name
&& //The attribute has a name.
2826 #ifdef PUGOPT_NONSEG
2827 matches_attribute_name(attr_name
,attrlen
,_root
->child
[i
]->attribute
[j
]) &&
2829 matches_attribute_name(attr_name
,_root
->child
[i
]->attribute
[j
]) &&
2831 _root
->child
[i
]->attribute
[j
]->value
&& //The attribute has a value.
2832 #ifdef PUGOPT_NONSEG
2833 matches_attribute_name_value(attr_value
,valulen
,_root
->child
[i
]->attribute
[j
])
2835 matches_attribute_name_value(attr_value
,_root
->child
[i
]->attribute
[j
])
2838 return xml_node(_root
->child
[i
]); //Wrap it up and return.
2841 else if(_root
->child
[i
] && _root
->child
[i
]->children
)
2843 xml_node
subsearch(_root
->child
[i
]); //Wrap it up for ease.
2844 xml_node found
= subsearch
.first_element_by_attribute(name
,attr_name
,attr_value
); //Search any children.
2845 if(!found
.empty()) return found
; //Found.
2849 return xml_node(); //Not found.
2853 // Recursively-implemented depth-first find the first matching entity.
2854 // Use for shallow drill-downs.
2856 //<param name="name">Pointer to name of element to find.</param>
2857 //<returns>Valid xml_node if such element named 'name' is found.</returns>
2858 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2859 xml_node
first_node(xml_node_type type
)
2861 if(!_root
) return xml_node();
2862 if(_root
->children
> 0) //Has children.
2864 register unsigned int n
= _root
->children
; //For each child.
2865 for(register unsigned int i
=0; i
<n
; ++i
)
2867 if(_root
->child
[i
]->type
==type
)
2868 return xml_node(_root
->child
[i
]);
2869 else if(_root
->child
[i
]->children
)
2871 xml_node
subsearch(_root
->child
[i
]);
2872 xml_node found
= subsearch
.first_node(type
);
2873 if(!found
.empty()) return found
; //Found.
2877 return xml_node(); //Not found.
2880 //<summary>Move to the absolute root of the document tree.</summary>
2881 //<returns>True if the current node is valid.</returns>
2882 //<remarks>Member '_root' may now point to absolute root of the document.</remarks>
2885 if(empty()) return false; //Nowhere to go.
2886 while(!type_document()) _root
= _root
->parent
; //Keep stepping out until we hit the root.
2887 return true; //Success.
2890 //<summary>Move to the current node's parent.</summary>
2891 //<returns>true if there is a parent and cursor is not parent, and cursor points thereto.</returns>
2892 //<remarks>'_root' may now point to parent.</remarks>
2893 bool moveto_parent()
2895 if(empty() || type_document()) return false; //Invalid, or at the root (has no parent).
2896 _root
= _root
->parent
; //Move to parent.
2897 return true; //Success.
2901 // Move to the current node's sibling at subscript. Equivalent to
2902 // 'moveto_child' following 'moveto_parent'.
2904 //<param name="i">Subscript of sibling to move cursor to.</param>
2905 //<returns>True if valid subscript, and cursor points thereto.</returns>
2906 //<remarks>If matching co-node was found, '_root' points thereto.</remarks>
2907 bool moveto_sibling(unsigned int i
)
2909 if(empty()) return false; //Nowhere to go.
2910 xml_node_struct
* restore
= _root
; //Save position in case invalid subscript & we want to restore.
2911 if(moveto_parent()) //Try to move to parent.
2913 if(i
< children()) //Subscript is in range. (Assume parent *does* have children.)
2915 _root
= _root
->child
[i
]; //Move to child at subscript ('sibling').
2916 return true; //Success.
2919 _root
= restore
; //Bad subscript, or parent move; restore.
2923 //<summary>Move to the current node's first sibling matching given name.</summary>
2924 //<param name="name">Element name of sibling to move to.</param>
2925 //<returns>True if sibling was found, and cursor points thereto.</returns>
2926 //<remarks>If matching co-node was found, '_root' points thereto.</remarks>
2927 bool moveto_first_sibling(const std::string
& name
){ return moveto_first_sibling(name
.c_str()); }
2929 //<summary>Move to the current node's first sibling matching given name.</summary>
2930 //<param name="name">Element name of sibling to move to.</param>
2931 //<returns>True if sibling was found, and cursor points thereto.</returns>
2932 //<remarks>If matching co-node was found, '_root' points thereto.</remarks>
2933 bool moveto_first_sibling(const TCHAR
* name
)
2935 if(empty() || !name
) return false; //Nowhere to go, or nothing to find.
2936 xml_node_struct
* restore
= _root
; //Save position in case invalid subscript & we want to restore.
2937 if(moveto_parent()) //Try to move to parent.
2939 #ifdef PUGOPT_NONSEG
2940 const unsigned int namelen
= _tcslen(name
);
2942 register unsigned int n
= children(); //Search for matching name
2943 for(register unsigned int i
=0; i
<n
; ++i
)
2945 //NF 24 Jan 2003 Changed to get child(i) just once per iteration.
2946 xml_node node
= child(i
); //Access child node at subscript as xml_node or xml_node(NULL) if bad subscript.
2947 if(node
.type_element()||node
.type_pi()) //Other types won't have names.
2949 #ifdef PUGOPT_NONSEG
2950 if(_tcsncmp(name
,node
.name(),max(namelen
,node
.name_size()))==0) //Do names match?
2952 if(strcmpwild(name
,node
.name())==0) //Do names match?
2955 _root
= node
; //Move there.
2956 return true; //Success.
2961 _root
= restore
; //Failed to locate any such sibling; restore position.
2965 //<summary>Move to the current node's child at subscript.</summary>
2966 //<param name="i">Subscript of child to move cursor to.</param>
2967 //<returns>true if valid subscript, and cursor points thereto.</returns>
2968 //<remarks>If matching sub-node was found, '_root' points thereto.</remarks>
2969 bool moveto_child(unsigned int i
)
2971 if(empty()) return false; //Null, so no children.
2972 if(has_child_nodes() && i
< children()) //Has children and subscript is in bounds.
2974 _root
= child(i
); //Move to the child at i.
2975 return true; //Success.
2977 return false; //Failure.
2980 //<summary>Move to the current node's child matching given name.</summary>
2981 //<param name="name">Element name of child to move to if found.</param>
2982 //<returns>True if child was found, and cursor points thereto.</returns>
2983 //<remarks>If matching sub-node was found, '_root' points thereto.</remarks>
2984 bool moveto_child(const std::string
& name
){ return moveto_child(name
.c_str()); }
2986 //<summary>Move to the current node's child matching given name.</summary>
2987 //<param name="name">Element name of child to move to if found.</param>
2988 //<returns>True if child was found, and cursor points thereto.</returns>
2989 //<remarks>If matching sub-node was found, '_root' points thereto.</remarks>
2990 bool moveto_child(const TCHAR
* name
)
2992 if(empty() || !name
|| !has_child_nodes()) return false; //The node is null, a name was not specified, or node has no children.
2993 #ifdef PUGOPT_NONSEG
2994 const unsigned int namelen
= _tcslen(name
);
2996 register unsigned int n
= children(); //For each child.
2997 for(register unsigned int i
=0; i
<n
; ++i
)
2999 //NF 24 Jan 2003: Changed to get child(i) just once per iteration.
3000 xml_node node
= child(i
); //Access child node at subscript as xml_node or xml_node(NULL) if bad subscript.
3001 #ifdef PUGOPT_NONSEG
3002 if(_tcsncmp(name
,node
.name(),max(namelen
,node
.name_size()))==0) //Do names match?
3004 if(strcmpwild(name
,node
.name())==0) //If the name is identical with 'name'.
3007 _root
= node
; //Move to it.
3008 return true; //Success.
3011 return false; //Failure.
3014 //<summary>Move to the current node's next sibling by position and name.</summary>
3015 //<param name="name">Name of sibling to move to if found.</param>
3016 //<returns>True if there is a next sibling, and cursor points thereto.</returns>
3017 bool moveto_next_sibling(const std::string
& name
){ return moveto_next_sibling(name
.c_str()); }
3019 //<summary>Move to the current node's next sibling by position and name.</summary>
3020 //<param name="name">Name of sibling to move to if found.</param>
3021 //<returns>True if there is a next sibling, and cursor points thereto.</returns>
3022 bool moveto_next_sibling(const TCHAR
* name
)
3024 if(empty() || type_document() || !_root
->parent
|| !name
) return false; //Null, or at root, or no name, so there are no valid matches.
3025 #ifdef PUGOPT_NONSEG
3026 const unsigned int namelen
= _tcslen(name
);
3028 register unsigned int n
= _root
->parent
->children
; //For each child of parent.
3029 for(register unsigned int i
=0; i
<(n
-1); ++i
)
3033 _root
->parent
->child
[i
] && //There is a child at i.
3034 _root
->parent
->child
[i
] == _root
&& //The child is identical with this node.
3035 i
< (n
-1) //This is not the last child.
3038 for(++i
; i
<n
; ++i
) //For each following child.
3042 _root
->parent
->child
[i
] && //There is a child at i.
3043 _root
->parent
->child
[i
]->name
&& //The child's name is not null.
3044 #ifdef PUGOPT_NONSEG
3045 matches_name(name
,namelen
,_root
->parent
->child
[i
])
3047 matches_name(name
,_root
->parent
->child
[i
])
3051 moveto_sibling(i
); //Move to it.
3052 return true; //Success.
3057 return false; //Failure.
3060 //<summary>Move to the current node's next sibling by position.</summary>
3061 //<returns>True if there is a next sibling, and cursor points thereto.</returns>
3062 bool moveto_next_sibling()
3064 if(empty() || type_document() || !_root
->parent
) return false; //Null or at root, so there are no valid siblings.
3065 register unsigned int n
= _root
->parent
->children
; //For each child of parent (each sibling).
3066 for(register unsigned int i
=0; i
<(n
-1); ++i
)
3070 _root
->parent
->child
[i
] && //There is a child at i.
3071 _root
->parent
->child
[i
] == _root
&& //The child is identical with this node.
3072 i
< (n
-1) //This is not the last child.
3075 for(++i
; i
<n
; ++i
) //For each following child.
3077 if(_root
->parent
->child
[i
]) //There is a child at i.
3079 moveto_sibling(i
); //Move to it.
3080 return true; //Success.
3085 return false; //Failure.
3088 //<summary>Compile the absolute node path from root as a text string.</summary>
3089 //<param name="delimiter">Delimiter string to insert between element names.</param>
3090 //<returns>Path string (e.g. with '/' as delimiter, '/document/.../this'.</returns>
3091 std::string
path(const TCHAR
* delimiter
= _T("/"))
3093 TCHAR
* path
= NULL
; //Current path.
3094 TCHAR
* temp
; //Temporary pointer.
3095 xml_node cursor
= *this; //Make a copy.
3096 #ifdef PUGOPT_NONSEG
3097 unsigned int destlen
= 0;
3098 strcatgrown_impl(&path
,cursor
.name(),destlen
,cursor
.name_size()); //Get this name.
3100 strcatgrow(&path
,cursor
.name()); //Get this name.
3102 while(cursor
.moveto_parent() && !cursor
.type_document()) //Loop to parent (stopping on actual root because it has no name).
3104 temp
= NULL
; //Mark as null so 'strcatgrow' will allocate memory.
3105 #ifdef PUGOPT_NONSEG
3107 strcatgrown_impl(&temp
,cursor
.name(),destlen
,cursor
.name_size()); //Append next element name.
3109 strcatgrow(&temp
,cursor
.name()); //Append next element name.
3111 strcatgrow(&temp
,delimiter
); //Append delimiter.
3112 strcatgrow(&temp
,path
); //Append current path.
3113 free(path
); //Free the old path.
3114 path
= temp
; //Set path as new string.
3117 strcatgrow(&temp
,delimiter
); //Prepend final delimiter.
3118 strcatgrow(&temp
,path
); //Append current path.
3119 free(path
); //Free the old path.
3120 std::string returns
= temp
; //Set path as new string.
3122 return returns
; //Return the path;
3125 //<summary>Search for a node by path.</summary>
3126 //<param name="path">
3127 // Path string; e.g. './foo/bar' (relative to node), '/foo/bar' (relative
3128 // to root), '../foo/bar' (pop relative position).
3130 //<param name="delimiter">Delimiter string to use in tokenizing path.</param>
3131 //<returns>Matching node, or xml_node(NULL) if not found.</returns>
3132 xml_node
first_element_by_path(const std::string
& path
,const std::string
& delimiter
= _T("/")){ return first_element_by_path(path
.c_str(),delimiter
.c_str()); }
3134 //<summary>Search for a node by path.</summary>
3135 //<param name="path">
3136 // Path string; e.g. './foo/bar' (relative to node), '/foo/bar' (relative
3137 // to root), '../foo/bar' (pop relative to position).
3139 //<param name="delimiter">Delimiter string to use in tokenizing path.</param>
3140 //<returns>Matching node, or xml_node(NULL) if not found.</returns>
3141 //<remarks>To-do: Support XPath-style queries.</remarks>
3142 xml_node
first_element_by_path(const TCHAR
* path
,const TCHAR
* delimiter
= _T("/"))
3144 if(!path
) return xml_node();
3146 pointer_array path_segments
; //Array of path segments.
3147 xml_node found
= *this; //Current search context.
3148 strcatgrow(&temp
,path
);
3149 TCHAR
* name
= _tcstok(temp
,delimiter
);
3150 while(name
) //Tokenize the whole path.
3152 path_segments
.push_back((void*)name
); //push_back it to array.
3153 name
= _tcstok(NULL
,delimiter
); //Get the next token,
3155 register unsigned int n
= path_segments
.size();
3156 if(n
== 0) return xml_node(); //Return null node if no path segments.
3157 if(path
[0]==delimiter
[0]) found
.moveto_root(); //Absolute path; e.g. '/foo/bar'
3158 for(register unsigned int i
= 0; i
<n
; ++i
) //For each path segment.
3160 name
= (TCHAR
*)path_segments
.at(i
);
3163 if(*name
==_T('.')) //Is '.' or '..'
3165 if(_tcscmp(name
,_T(".."))==0) found
.moveto_parent(); //Pop.
3166 else continue; //Ignore '.' since it is redundant if path is './path'.
3170 register unsigned int j
, m
= found
.children(); //For each child.
3173 if(found
.child(j
).has_name(name
)) //Name matches?
3175 found
= found
.child(j
); //Move to this child.
3176 goto NEXT_ELEM
; //Search next path segment.
3179 if(found
.moveto_next_sibling(found
.name())) //Find next sibling having same name.
3181 if(i
> 0) --i
; //Try the previous path segment.
3184 else //Move to parent to search further.
3186 if(!found
.type_document() && found
.moveto_parent() && !found
.type_document()) //Not root and stepped to parent and parent is not root.
3188 if(i
> 0) --i
; //Try the previous path segment.
3189 if(found
.moveto_next_sibling(found
.name())) //Try to find next sibling having same name.
3191 if(i
> 0) --i
; //Try the previous path segment.
3199 if(found
.type_document()) //Can't move up any higher, so fail.
3201 free(temp
); //Got to free this.
3202 return xml_node(); //Return null node.
3205 free(temp
); //Got to free this.
3206 return found
; //Return the matching node.
3209 //<summary>Recursively traverse the tree.</summary>
3210 //<param name="walker">Reference to tree walker derived from xml_tree_walker.</param>
3211 //<returns>True if traversal was not halted by xml_tree_walker::for_each() callback.</returns>
3212 bool traverse(xml_tree_walker
& walker
)
3214 if(walker
.depth() == 0 && !walker
.begin(*this)) return false; //Send the callback for begin traverse if depth is zero.
3215 if(!empty()) //Don't traveres if this is a null node.
3217 walker
.push(); //Increment the walker depth counter.
3218 register unsigned int n
= _root
->children
; //For each child.
3219 for(register unsigned int i
=0; i
<n
; ++i
)
3221 if(_root
->child
[i
]) //There is a child at i.
3223 xml_node
subsearch(_root
->child
[i
]); //Wrap it.
3224 if(!(walker
.for_each(subsearch
) && subsearch
.traverse(walker
)))
3225 return false; //Traversal was aborted.
3228 walker
.pop(); //Decrement the walker depth counter.
3230 if(walker
.depth() == 0 && !walker
.end(*this)) return false; //Send the callback for end traverse if depth is zero.
3237 //<summary>Set element name.</summary>
3238 //<param name="new_name">New element name.</param>
3239 //<returns>Success.</returns>
3240 bool name(const std::string
& new_name
){ return name(new_name
.c_str()); }
3242 //<summary>Set element name.</summary>
3243 //<param name="new_name">New element name.</param>
3244 //<returns>Success.</returns>
3245 bool name(const TCHAR
* new_name
)
3247 if((type_element() || type_pi()) && new_name
)
3248 #ifdef PUGOPT_NONSEG
3249 return strcpyinsitu(&_root
->name
,new_name
,&_root
->name_insitu
,_root
->name_size
);
3251 return strcpyinsitu(&_root
->name
,new_name
,&_root
->name_insitu
);
3256 //<summary>Set node data.</summary>
3257 //<param name="value">New data (PCDATA, CDATA, or comment) value.</param>
3258 //<returns>Success.</returns>
3259 bool value(const std::string
& new_value
){ return value(new_value
.c_str()); }
3261 //<summary>Set node data.</summary>
3262 //<param name="value">New data (PCDATA, CDATA, or comment) value.</param>
3263 //<returns>Success.</returns>
3264 bool value(const TCHAR
* new_value
)
3266 if((type_pcdata() || type_cdata() || type_comment()) && new_value
)
3267 #ifdef PUGOPT_NONSEG
3268 return strcpyinsitu(&_root
->value
,new_value
,&_root
->value_insitu
,_root
->value_size
);
3270 return strcpyinsitu(&_root
->value
,new_value
,&_root
->value_insitu
);
3275 //<summary>Remove attribute at the given subscript.</summary>
3276 //<param name="i">Subscript.</param>
3277 //<returns>Success.</returns>
3278 bool remove_attribute(unsigned int i
)
3280 unsigned int n
= _root
->attributes
;
3283 xml_attribute_struct
* temp
= _root
->attribute
[i
];
3285 for(unsigned int j
=i
; j
<n
; ++j
)
3286 _root
->attribute
[j
] = _root
->attribute
[j
+1];
3287 _root
->attribute
[n
] = NULL
;
3288 if(!temp
->name_insitu
) free(temp
->name
);
3289 if(!temp
->value_insitu
) free(temp
->value
);
3291 --_root
->attributes
;
3297 //<summary>Remove attribute having the given name.</summary>
3298 //<param name="name">Name of attribute to delete.</param>
3299 //<returns>Success.</returns>
3300 bool remove_attribute(const std::string
& name
){ return remove_attribute(name
.c_str()); }
3302 //<summary>Remove attribute having the given name.</summary>
3303 //<param name="name">Name of attribute to delete.</param>
3304 //<returns>Success.</returns>
3305 bool remove_attribute(const TCHAR
* name
)
3307 int i
= mapto_attribute_idx(name
);
3308 if(i
> -1) return remove_attribute((unsigned int)i
);
3312 //<summary>Append a new attribute to the node list of attributes.</summary>
3313 //<param name="name">Name.</param>
3314 //<param name="value">Value thereof.</param>
3315 //<returns>Attribute structure wrapper.</returns>
3316 //<remarks>Pointer space may be grown, memory for name/value members allocated.</remarks>
3317 xml_attribute
append_attribute(const std::string
& name
,const std::string
& value
){ return append_attribute(name
.c_str(),value
.c_str()); }
3319 //<summary>Append a new attribute to the node list of attributes.</summary>
3320 //<param name="name">Name.</param>
3321 //<param name="value">Value thereof.</param>
3322 //<returns>Attribute structure wrapper.</returns>
3323 //<remarks>Pointer space may be grown, memory for name/value members allocated.</remarks>
3324 xml_attribute
append_attribute(const TCHAR
* name
,const TCHAR
* value
)
3326 if(!name
|| !value
) return xml_attribute(); //We must have both to proceed.
3327 xml_attribute_struct
* p
= pug::append_attribute(_root
,1); //Append/allocate a new attribute structure.
3328 if(p
) //If append/allocate succeeded.
3330 #ifdef PUGOPT_NONSEG
3331 strcatgrown(&p
->name
,name
,p
->name_size
); //Append the name.
3332 strcatgrown(&p
->value
,value
,p
->value_size
); //Append the name.
3334 strcatgrow(&p
->name
,name
); //Append the name.
3335 strcatgrow(&p
->value
,value
); //Append the name.
3337 p
->name_insitu
= p
->value_insitu
= false; //Mark as not part of original parse string.
3338 return xml_attribute(p
); //Success.
3340 return xml_attribute(); //Failure; return an empty.
3343 //<summary>Append a new attribute of type long to the node list of attributes.</summary>
3344 //<param name="name">Name.</param>
3345 //<param name="value">Value thereof.</param>
3346 //<returns>Attribute structure wrapper.</returns>
3347 //<remarks>Pointer space may be grown, memory for name/value members allocated.</remarks>
3348 xml_attribute
append_attribute(const TCHAR
* name
,long value
)
3350 if(!name
) return false;
3351 TCHAR temp
[32] = {0};
3352 _stprintf(temp
,_T("%ld"),value
);
3353 return append_attribute(name
,temp
);
3356 //<summary>Append a new attribute of type double to the node list of attributes.</summary>
3357 //<param name="name">Name.</param>
3358 //<param name="value">Value thereof.</param>
3359 //<returns>Attribute structure wrapper.</returns>
3360 //<remarks>Pointer space may be grown, memory for name/value members allocated.</remarks>
3361 xml_attribute
append_attribute(const TCHAR
* name
,double value
)
3363 if(!name
) return false;
3364 TCHAR temp
[32] = {0};
3365 _stprintf(temp
,_T("%lf"),value
);
3366 return append_attribute(name
,temp
);
3369 //<summary>Append a new attribute of type bool to the node list of attributes.</summary>
3370 //<param name="name">Name.</param>
3371 //<param name="value">Value thereof.</param>
3372 //<returns>Attribute structure wrapper.</returns>
3373 //<remarks>Pointer space may be grown, memory for name/value members allocated.</remarks>
3374 xml_attribute
append_attribute(const TCHAR
* name
,bool value
)
3376 if(!name
) return false;
3377 return append_attribute(name
,((value
)?_T("true"):_T("false")));
3380 //<summary>Set the current node entity type.</summary>
3381 //<param name="new_type">New type to set.</param>
3382 //<returns>Previous type.</returns>
3383 //<remarks>If has children and now is not node_element, children are obscured.</remarks>
3384 xml_node_type
type(xml_node_type new_type
)
3386 xml_node_type prev
= _root
->type
; //Save old type.
3387 _root
->type
= new_type
; //Set new type.
3388 return prev
; //Return old type.
3392 // Allocate & append a child node of the given type at the end of the
3393 // current node array of children.
3395 //<param name="type">New child node type.</param>
3396 //<returns>xml_node wrapping the new child.</returns>
3397 //<remarks>Pointer space may be grown. An xml_node_struct structure is allocated.</remarks>
3398 xml_node
append_child(xml_node_type type
)
3400 if(type_document()||type_element()) //Don't do anything if not an node_element or root.
3402 xml_node_struct
* p
= pug::append_node(_root
,1,type
); //Append the node.
3405 p
->name_insitu
= p
->value_insitu
= false;
3406 return xml_node(p
); //If we have it, return wrapped.
3409 return xml_node(); //Return dummy.
3412 //<summary>Allocate & insert a child node of the given type at subscript.</summary>
3413 //<param name="i">Subscript at which to insert.</param>
3414 //<param name="type">New child node type.</param>
3415 //<returns>xml_node wrapping the new child.</returns>
3417 // Pointer space may be grown. An xml_node_struct structure is allocated,
3418 // and existing children are shifted in their array position.
3420 xml_node
insert_child(unsigned int i
,xml_node_type type
)
3422 if(!type_element()) return xml_node(); //Don't do anything if not an node_element.
3423 unsigned int n
= _root
->children
; //Get count of existing children.
3424 if(type_element() && i
>= n
) return append_child(type
); //If subscript at end of array then just append.
3425 else if(type_element() && i
< n
)
3427 xml_node_struct
* p
= pug::append_node(_root
,1,type
); //Append the new node (by default at last array position).
3428 if(p
) //Ensure we have it.
3430 register int m
= (i
-1); //Stop at i.
3431 for(register int j
=(n
-1); j
>m
; --j
) //Starting at one less than end of array, reverse loop to i.
3432 _root
->child
[j
+1] = _root
->child
[j
]; //Shift node to right.
3433 _root
->child
[i
] = p
; //Set node at subscript to new node.
3434 return xml_node(p
); //Return new node.
3437 return xml_node(); //Return dummy.
3440 //<summary>Delete the child node at the given subscript.</summary>
3441 //<param name="i">Subscript.</param>
3442 //<returns>Success.</returns>
3443 //<remarks>Shifts child array element positions. Frees entire tree under child to be deleted.</remarks>
3444 bool remove_child(unsigned int i
)
3446 unsigned int n
= _root
->children
;
3447 if(i
< n
) //Ensure subscript is in bounds.
3449 xml_node_struct
* p
= _root
->child
[i
]; //Keep a pointer to this node so we can free it.
3452 for(j
=i
; j
<n
; ++j
) //Shift everything left from this point on.
3453 _root
->child
[j
] = _root
->child
[j
+1];
3454 _root
->child
[j
] = NULL
; //Mark the last element null.
3455 --_root
->children
; //One less children.
3456 p
->parent
= p
; //This ensures we only free this node when calling 'free_node'.
3457 pug::free_node(p
); //Free the node tree.
3458 return true; //Success.
3460 return false; //Failure.
3463 //Stream/Output Helpers
3467 // Stream output. Recursively writes the internal xml_node_struct structure
3468 // to the given stream.
3470 //<param name="os">Reference to output stream.</param>
3471 //<param name="indent_char">Char to use for indent.</param>
3472 //<param name="breaks">Use linebreaks?</param>
3473 //<remarks>String data is written to stream.</remarks>
3474 void outer_xml(std::basic_ostream
<TCHAR
,std::char_traits
<TCHAR
> >& os
,TCHAR indent_char
= _T('\t'),bool breaks
= true)
3476 if(empty()) return; //Make sure there is something to output.
3477 indent_stack
indent(indent_char
); //Prepare the indent.
3478 if(type_document()) //If this is the root, we don't want to output the root itself.
3480 register unsigned int n
= _root
->children
; //Output each child of the root.
3481 for(register unsigned int i
=0; i
<n
; ++i
)
3482 pug::outer_xml(os
,indent
,_root
->child
[i
],breaks
);
3484 else pug::outer_xml(os
,indent
,_root
,breaks
); //Output the node.
3488 // Stream output operator. Wraps 'outer_xml'. Recursively writes
3489 // the given node to the given stream.
3491 //<param name="os">Reference to output stream.</param>
3492 //<param name="xml_node">Reference to tree node.</param>
3493 //<returns>Reference to output stream.</returns>
3494 //<remarks>String data is written to stream.</remarks>
3495 friend std::basic_ostream
<TCHAR
,std::char_traits
<TCHAR
> >& operator<<(std::basic_ostream
<TCHAR
,std::char_traits
<TCHAR
> >& os
,xml_node node
)
3497 if(!os
.good()) return os
;
3498 if((os
.flags()|std::ostream::skipws
) == std::ostream::skipws
)
3499 node
.outer_xml(os
,0,false); //Skipping whitespace; suppress indents & linebreaks.
3500 else node
.outer_xml(os
); //Default options.
3506 //<summary>Provides a high-level interface to the XML parser.</summary>
3509 //Internal Data Members
3512 xml_node_struct
* _xmldoc
; //Pointer to current XML document tree root.
3513 long _growby
; //Attribute & child pointer space growth increment.
3514 bool _autdel
; //Delete the tree on destruct?
3515 TCHAR
* _buffer
; //Pointer to in-memory buffer (for 'parse_file').
3516 TCHAR
* _strpos
; //Where parsing left off (for 'parse_file').
3517 unsigned long _optmsk
; //Parser options.
3518 #ifdef PUGOPT_MEMFIL
3519 HANDLE _mmfile
; //File handle.
3520 HANDLE _mmfmap
; //Handle which maps the file.
3521 void* _mmaddr
; //Base address of map.
3522 size_t _mfsize
; //Size of memory-mapped file.
3523 bool _addeos
; //True if we had to add a 0 to then end of the file.
3526 //Construction/Destruction
3529 //<summary>Constructor.</summary>
3530 //<param name="optmsk">Options mask.</param>
3531 //<param name="autdel">Delete tree on destruct?</param>
3532 //<param name="growby">Parser pointer space growth increment.</param>
3533 //<remarks>Root node structure is allocated.</remarks>
3534 xml_parser(unsigned long optmsk
= parse_default
,bool autdel
= true,long growby
= parse_grow
):
3541 #ifdef PUGOPT_MEMFIL
3552 //<summary>Direct parse constructor.</summary>
3553 //<param name="xmlstr">
3554 // XML-formatted string to parse. Note: String must persist for the
3555 // life of the tree. String is zero-segmented, but not freed.
3557 //<param name="optmsk">Parser options.</param>
3558 //<param name="autdel">Delete tree on destruct?</param>
3559 //<param name="growby">Parser pointer space growth increment.</param>
3560 //<remarks>Root node structure is allocated, string is parsed & tree may be grown.</remarks>
3561 xml_parser(TCHAR
* xmlstr
,unsigned long optmsk
= parse_default
,bool autdel
= true,long growby
= parse_grow
) :
3568 #ifdef PUGOPT_MEMFIL
3577 parse(xmlstr
,_optmsk
); //Parse it.
3580 //<summary>Destructor.</summary>
3581 //<remarks>Tree memory and string memory may be freed.</remarks>
3582 virtual ~xml_parser()
3584 if(_autdel
&& _xmldoc
) free_node(_xmldoc
);
3585 if(_buffer
) free(_buffer
);
3586 #ifdef PUGOPT_MEMFIL
3591 //Accessors/Operators
3594 operator xml_node_struct
*() { return _xmldoc
; } //Cast as xml_node_struct pointer to root.
3595 operator xml_node() { return xml_node(_xmldoc
); } //Cast as xml_node (same as document).
3596 xml_node
document(){ return xml_node(_xmldoc
); } //Returns the root wrapped by an xml_node.
3601 //<summary>Allocate a new, empty root.</summary>
3602 //<remarks>Tree memory and string memory may be freed.</remarks>
3605 clear(); //Free any allocated memory.
3606 _xmldoc
= new_node(node_document
); //Allocate a new root.
3607 _xmldoc
->parent
= _xmldoc
; //Point to self.
3610 //<summary>Clear any existing tree or string.</summary>
3611 //<remarks>Tree memory and string memory may be freed.</remarks>
3614 if(_xmldoc
){ free_node(_xmldoc
); _xmldoc
= 0; }
3615 if(_buffer
){ free(_buffer
); _buffer
= 0; }
3616 #ifdef PUGOPT_MEMFIL
3621 #ifdef PUGOPT_MEMFIL
3623 //Memory-Mapped File Support
3626 //<summary>Closes any existing memory-mapped file.</summary>
3627 void close_memfile()
3631 UnmapViewOfFile(_mmaddr
);
3636 CloseHandle(_mmfmap
);
3641 if(_addeos
) //Remove the 0 we added to the end of the file.
3643 SetFilePointer(_mmfile
,_mfsize
,NULL
,FILE_BEGIN
);
3644 SetEndOfFile(_mmfile
);
3647 CloseHandle(_mmfile
);
3657 //<summary>Attach an externally-generated root to the parser.</summary>
3658 //<param name="root">Pointer to node structure.</param>
3659 //<returns>Pointer to old root if any.</returns>
3660 //<remarks>New root may be deleted on dtor if autodelete set.</remarks>
3661 xml_node_struct
* attach(xml_node_struct
* root
)
3663 xml_node_struct
* t
= _xmldoc
; //Save this root.
3664 _xmldoc
= root
; //Assign.
3665 _xmldoc
->parent
= _xmldoc
; //Ensure we are the root.
3666 return t
; //Return the old root if any.
3669 //<summary>Detach the current root from the parser.</summary>
3670 //<returns>Pointer to old root, if any.</returns>
3671 xml_node_struct
* detach()
3673 xml_node_struct
* t
= _xmldoc
; //Save this root.
3674 _xmldoc
= 0; //So we don't delete later on if autodelete set.
3675 return t
; //Return the old root if any.
3678 //<summary>Get parser optsions mask.</summary>
3679 //<returns>Options mask.</returns>
3680 unsigned long options(){ return _optmsk
; }
3682 //<summary>Set parser options mask.</summary>
3683 //<param name="optmsk">Options mask to set.</param>
3684 //<returns>Old options mask.</returns>
3685 unsigned long options(unsigned long optmsk
)
3687 unsigned long prev
= _optmsk
;
3692 //<summary>Get pointer space growth size increment.</summary>
3693 //<returns>Grow size.</returns>
3694 unsigned long growby(){ return _growby
; }
3696 //<summary>Set pointer space growth size increment.</summary>
3697 //<param name="grow">Grow size to set.</param>
3698 //<returns>Old size.</returns>
3699 unsigned long growby(long grow
)
3701 long prev
= _growby
;
3706 //<summary>Get parse file buffer last string position.</summary>
3707 //<returns>Last string position.</returns>
3709 // Use after parse_file, with parse_dtd_only set in order to recommence
3710 // parse of document body.
3720 //<summary>Parse the given XML string in-situ.</summary>
3721 //<param name="s">Pointer to XML-formatted string.</param>
3722 //<param name="optmsk">Parser options mask.</param>
3723 //<returns>Last string position or null.</returns>
3724 //<remarks>Input string is zero-segmented.</remarks>
3725 TCHAR
* parse(TCHAR
* s
,unsigned long optmsk
= parse_noset
)
3728 clear(); //Free any allocated memory.
3729 _xmldoc
= new_node(node_document
); //Allocate a new root.
3730 _xmldoc
->parent
= _xmldoc
; //Point to self.
3731 if(optmsk
!= parse_noset
) _optmsk
= optmsk
;
3732 return pug::parse(s
,_xmldoc
,_growby
,_optmsk
); //Parse the input string.
3736 //<summary>Load into memory and parse the contents of the file at the given path.</summary>
3737 //<param name="path">File path.</param>
3738 //<param name="optmsk">Parser options.</param>
3739 //<returns>Success if the file was loaded.</returns>
3741 // The file contents is loaded and stored in the member '_buffer' until
3742 // freed by calling 'Parse', 'parse_file', 'clear' or '~xml_parser'.
3744 bool parse_file(const TCHAR* path,unsigned long optmsk = parse_noset)
3746 if(!path) return false;
3747 clear(); //clear any existing data.
3748 unsigned long bytes;
3749 if(optmsk != parse_noset) _optmsk = optmsk;
3750 if(load_file(path,&_buffer,&bytes) && bytes > 0)
3752 _xmldoc = pug::new_node(node_document);
3753 _xmldoc->parent = _xmldoc; //Point to self.
3754 TCHAR* s = pug::parse(_buffer,_xmldoc,_growby,_optmsk);
3762 #ifdef PUGOPT_MEMFIL
3764 //<summary>Parse the contents of the file at the given path, using a memory-mapped file.</summary>
3765 //<param name="path">File path.</param>
3766 //<param name="optmsk">Parser options.</param>
3768 // True (1) if the file was parsed successfully, false (0) if open failed,
3769 // and -1 if an exception occured.
3772 // The file contents are available until closed by calling 'parse',
3773 // 'parse_file', 'clear' or '~xml_parser'.
3775 int parse_mmfile(const TCHAR
* path
,unsigned long optmsk
= parse_noset
)
3780 clear(); //Clear any existing data.
3781 if(optmsk
!= parse_noset
) _optmsk
= optmsk
;
3782 assert((optmsk
& parse_wnorm
) == 0); //Normalization isn't implemented for memory-mapped files, as of 23 Jan 2003.
3783 const bool readonly
= (optmsk
& (parse_dtd
|parse_dtd_only
)) == 0;
3784 if(open_mmfile(path
,readonly
,false))
3786 //If the file has a 0 at the end we are ok to proceed, otherwise add one.
3790 *(((TCHAR
*)_mmaddr
) + _mfsize
) == 0
3794 *(((TCHAR
*)_mmaddr
) + _mfsize
- 1) == 0
3798 open_mmfile(path
,false,true) //Re-open and add 0 at EOF.
3803 _xmldoc
= new_node(node_document
);
3804 _xmldoc
->parent
= _xmldoc
; //Point to self.
3805 TCHAR
* s
= pug::parse((TCHAR
*)_mmaddr
,_xmldoc
,_growby
,_optmsk
);
3822 //<summary>Opens the specified memory-mapped file.</summary>
3823 //<param name="path">File path.</param>
3824 //<param name="readonly">True to open the file for read-only access.</param>
3825 //<param name="addeos">True to add a 0 to the end of the file.</param>
3826 //<returns>Success if the file was opened.</returns>
3827 bool open_mmfile(const TCHAR
* path
,const bool readonly
,const bool addeos
)
3829 clear(); //Close any existing MMF and clear any existing data.
3830 assert(_mmfile
== NULL
&& _mmfile
== NULL
&& _mmaddr
== NULL
);
3832 _mmfile
= CreateFile(path
,readonly
?GENERIC_READ
:GENERIC_READ
|GENERIC_WRITE
,0,NULL
,OPEN_EXISTING
,FILE_ATTRIBUTE_NORMAL
,NULL
); //Open read-only, no share, no security attrs, ..., no template.
3833 if(_mmfile
!= INVALID_HANDLE_VALUE
)
3835 _mfsize
= ::GetFileSize(_mmfile
,NULL
);
3836 _mmfmap
= CreateFileMapping(_mmfile
,NULL
,readonly
?PAGE_READONLY
:PAGE_READWRITE
,0,_mfsize
+(addeos
?sizeof(TCHAR
):0),NULL
); //Create map: handle, no security attr, read|read/write, larger if addeos, anonymous.
3839 assert(_mmaddr
== NULL
);
3840 _mmaddr
= MapViewOfFile(_mmfmap
,readonly
?FILE_MAP_READ
:FILE_MAP_WRITE
,0,0,0); //Map the view: handle, read|read/write, start at beginning, map entire file.
3843 if(addeos
) //Add a terminating 0 to the end of the file for 'parse()'.
3846 *(((TCHAR
*)_mmaddr
) + _mfsize
) = 0;
3852 CloseHandle(_mmfmap
);
3853 CloseHandle(_mmfile
);
3854 _mmfile
= _mmfmap
= 0;
3859 CloseHandle(_mmfile
);
3863 return (_mmaddr
!= NULL
);
3871 //<summary>An array of nodes, used by xml_node queries.</summary>
3872 class xml_node_list
: public pointer_array
3875 xml_node_list(unsigned int grow
= 4) : pointer_array(grow
) { }
3876 virtual ~xml_node_list(){ }
3878 xml_node
at(long i
){ return xml_node((xml_node_struct
*)pointer_array::at((unsigned int)i
)); } //Access xml_node at subscript.
3879 xml_node
operator[](long i
){ return xml_node((xml_node_struct
*)pointer_array::at((unsigned int)i
)); } //Access xml_node at subscript.
3880 friend std::ostream
& operator<<(std::ostream
& os
,xml_node_list
& list
) //Output helper.
3882 if(!os
.good()) return os
;
3883 unsigned int n
= list
.size();
3884 for(unsigned int i
=0; i
<n
; ++i
) os
<< list
[i
];
3892 // Undefine these horrible macros
3893 #undef PUGOPT_MEMFIL
3894 #undef PUGOPT_NONSEG
3895 #undef PUGAPI_INTERNAL_VARIANT
3896 #undef PUGAPI_INTERNAL_VERSION_MAJOR
3897 #undef PUGAPI_INTERNAL_VERSION_MINOR
3898 #undef PUGAPI_INTERNAL_VERSION
3899 #undef PUGDEF_ATTR_NAME_SIZE
3900 #undef PUGDEF_ATTR_VALU_SIZE
3901 #undef PUGDEF_ELEM_NAME_SIZE
3909 #ifdef UNDEF_LOHIWORD
3912 #undef UNDEF_LOHIWORD
3915 #ifdef UNDEF_TCHAR_AND_REST
3927 #undef UNDEF_TCHAR_AND_REST