fix doc example typo
[boost.git] / boost / property_tree / detail / pugxml.hpp
blob81b281b7d0c627372ee05060c39c507a087dfc36
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 // Pug XML Parser - Version 1.0002
4 // --------------------------------------------------------
5 // Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
6 // Released into the Public Domain. Use at your own risk.
7 // See pugxml.xml for further information, history, etc.
8 // Contributions by Neville Franks (readonly@getsoft.com).
9 //
10 // Modified to suit boost::property_tree library by Marcin Kalicinski
12 #ifndef BOOST_PROPERTY_TREE_DETAIL_PUGXML_HPP_INCLUDED
13 #define BOOST_PROPERTY_TREE_DETAIL_PUGXML_HPP_INCLUDED
15 #ifndef TCHAR
16 #define UNDEF_TCHAR_AND_REST
17 #define TCHAR char
18 #define _tcslen strlen
19 #define _istalnum isalnum
20 #define _tcsncpy strncpy
21 #define _tcscpy strcpy
22 #define _tcscmp strcmp
23 #define _tcstol strtol
24 #define _tcstod strtod
25 #define _tcstok strtok
26 #define _stprintf sprintf
27 #define _T(s) s
28 #endif
30 //#define PUGOPT_MEMFIL //Uncomment to enable memory-mapped file parsing support.
31 //#define PUGOPT_NONSEG //Uncomment to enable non-destructive (non-segmenting) parsing support.
33 #ifdef PUGOPT_MEMFIL
34 # ifndef PUGOPT_NONSEG
35 # define PUGOPT_NONSEG //PUGOPT_MEMFIL implies PUGOPT_NONSEG.
36 # endif
37 #endif
39 #include <iostream>
40 #include <ostream>
41 #include <string>
42 #include <cstring>
43 #if defined(PUGOPT_MEMFIL) | defined(PUGOPT_NONSEG)
44 # include <assert.h>
45 #endif
47 #ifndef HIWORD
48 # define UNDEF_LOHIWORD
49 # define HIWORD(X) ((unsigned short)((unsigned long)(X)>>16))
50 # define LOWORD(X) ((unsigned short)((unsigned long)(X)&0xFFFF))
51 #endif
53 //<summary>
54 // Library variant ID. The ID 0x58475550 is owned by Kristen Wegner. You *MUST*
55 // provide your own unique ID if you modify or fork the code in this library to
56 // your own purposes. If you change this then *you* are now the maintainer, not me.
57 // Change also in the package section of pugxml.xml, and append yourself to the
58 // authors section.
59 //</summary>
60 #define PUGAPI_INTERNAL_VARIANT 0xdeadbeef
61 //<summary>Major version. Increment for each major release. Only change if you own the variant.</summary>
62 #define PUGAPI_INTERNAL_VERSION_MAJOR 1
63 //<summary>Minor version. Increment for each minor release. Only change if you own the variant ID.</summary>
64 #define PUGAPI_INTERNAL_VERSION_MINOR 2
66 #define PUGAPI_INTERNAL_VERSION ((PUGAPI_INTERNAL_VERSION_MINOR&0xFFFF)|PUGAPI_INTERNAL_VERSION_MAJOR<<16)
68 #define PUGDEF_ATTR_NAME_SIZE 128
69 #define PUGDEF_ATTR_VALU_SIZE 256
70 #define PUGDEF_ELEM_NAME_SIZE 256
72 //<summary>The PugXML Parser namespace.</summary>
73 namespace boost { namespace property_tree { namespace xml_parser { namespace pug
76 //<summary>The Library Variant ID. See PUGAPI_INTERNAL_VARIANT for an explanation.</summary>
77 //<returns>The current Library Variant ID.</returns>
78 inline static unsigned long lib_variant(){ return PUGAPI_INTERNAL_VARIANT; }
79 //<summary>The library version. High word is major version. Low word is minor version.</summary>
80 //<returns>The current Library Version.</returns>
81 inline static unsigned long lib_version(){ return PUGAPI_INTERNAL_VERSION; }
84 //<summary>A 'name=value' XML attribute structure.</summary>
85 typedef struct t_xml_attribute_struct
87 TCHAR* name; //Pointer to attribute name.
88 bool name_insitu; //True if 'name' is a segment of the original parse string.
89 #ifdef PUGOPT_NONSEG
90 unsigned int name_size; //Length of element name.
91 #endif
92 TCHAR* value; //Pointer to attribute value.
93 bool value_insitu; //True if 'value' is a segment of the original parse string.
94 #ifdef PUGOPT_NONSEG
95 unsigned int value_size; //Length of element name.
96 #endif
98 xml_attribute_struct;
101 //<summary>Tree node classification.</summary>
102 //<remarks>See 'xml_node_struct::type'.</remarks>
103 typedef enum t_xml_node_type
105 node_null, //An undifferentiated entity.
106 node_document, //A document tree's absolute root.
107 node_element, //E.g. '&lt;...&gt;'
108 node_pcdata, //E.g. '&gt;...&lt;'
109 node_cdata, //E.g. '&lt;![CDATA[...]]&gt;'
110 node_comment, //E.g. '&lt;!--...--&gt;'
111 node_pi, //E.g. '&lt;?...?&gt;'
112 node_include, //E.g. '&lt;![INCLUDE[...]]&gt;'
113 node_doctype, //E.g. '&lt;!DOCTYPE ...&gt;'.
114 node_dtd_entity, //E.g. '&lt;!ENTITY ...&gt;'.
115 node_dtd_attlist, //E.g. '&lt;!ATTLIST ...&gt;'.
116 node_dtd_element, //E.g. '&lt;!ELEMENT ...&gt;'.
117 node_dtd_notation //E.g. '&lt;!NOTATION ...&gt;'.
119 xml_node_type;
122 static const unsigned long parse_grow = 4; //Default child element & attribute space growth increment.
125 //Parser Options
126 static const unsigned long parse_minimal = 0x00000000; //Unset the following flags.
127 static const unsigned long parse_pi = 0x00000002; //Parse '&lt;?...?&gt;'
128 static const unsigned long parse_doctype = 0x00000004; //Parse '&lt;!DOCTYPE ...&gt;' section, setting '[...]' as data member.
129 static const unsigned long parse_comments = 0x00000008; //Parse &lt;!--...--&gt;'
130 static const unsigned long parse_cdata = 0x00000010; //Parse '&lt;![CDATA[...]]&gt;', and/or '&lt;![INCLUDE[...]]&gt;'
131 static const unsigned long parse_escapes = 0x00000020; //Not implemented.
132 static const unsigned long parse_trim_pcdata = 0x00000040; //Trim '&gt;...&lt;'
133 static const unsigned long parse_trim_attribute = 0x00000080; //Trim 'foo="..."'.
134 static const unsigned long parse_trim_cdata = 0x00000100; //Trim '&lt;![CDATA[...]]&gt;', and/or '&lt;![INCLUDE[...]]&gt;'
135 static const unsigned long parse_trim_entity = 0x00000200; //Trim '&lt;!ENTITY name ...&gt;', etc.
136 static const unsigned long parse_trim_doctype = 0x00000400; //Trim '&lt;!DOCTYPE [...]&gt;'
137 static const unsigned long parse_trim_comment = 0x00000800; //Trim &lt;!--...--&gt;'
138 static const unsigned long parse_wnorm = 0x00001000; //Normalize all entities that are flagged to be trimmed.
139 static const unsigned long parse_dtd = 0x00002000; //If parse_doctype set, then parse whatever is in data member ('[...]').
140 static const unsigned long parse_dtd_only = 0x00004000; //If parse_doctype|parse_dtd set, then parse only '&lt;!DOCTYPE [*]&gt;'
141 static const unsigned long parse_default = 0x0000FFFF;
142 static const unsigned long parse_noset = 0x80000000;
145 //<summary>An XML document tree node.</summary>
146 typedef struct t_xml_node_struct
148 t_xml_node_struct* parent; //Pointer to parent
149 TCHAR* name; //Pointer to element name.
150 #ifdef PUGOPT_NONSEG
151 unsigned int name_size; //Length of element name. Since 19 Jan 2003 NF.
152 #endif
153 bool name_insitu; //True if 'name' is a segment of the original parse string.
154 xml_node_type type; //Node type; see xml_node_type.
155 unsigned int attributes; //Count attributes.
156 unsigned int attribute_space; //Available pointer space in 'attribute'.
157 xml_attribute_struct** attribute; //Array of pointers to attributes; see xml_attribute_struct.
158 unsigned int children; //Count children in member 'child'.
159 unsigned int child_space; //Available pointer space in 'child'.
160 t_xml_node_struct** child; //Array of pointers to children.
161 TCHAR* value; //Pointer to any associated string data.
162 #ifdef PUGOPT_NONSEG
163 unsigned int value_size; //Length of element data. Since 19 Jan 2003 NF.
164 #endif
165 bool value_insitu; //True if 'data' is a segment of the original parse string.
167 xml_node_struct;
170 //<summary>Concatenate 'rhs' to 'lhs', growing 'rhs' if neccessary.</summary>
171 //<param name="lhs">Pointer to pointer to receiving string. Note: If '*lhs' is not null, it must have been dynamically allocated using 'malloc'.</param>
172 //<param name="rhs">Source.</param>
173 //<returns>Success if 'realloc' was successful.</returns>
174 //<remarks>'rhs' is resized and 'rhs' is concatenated to it.</remarks>
175 inline static bool strcatgrow(TCHAR** lhs,const TCHAR* rhs)
177 if(!*lhs) //Null, so first allocate.
179 *lhs = (TCHAR*) malloc(1UL*sizeof(TCHAR));
180 **lhs = 0; //Zero-terminate.
182 size_t ulhs = _tcslen(*lhs);
183 size_t urhs = _tcslen(rhs);
184 TCHAR* temp = (TCHAR*) realloc(*lhs,(ulhs+urhs+1UL)*sizeof(TCHAR));
185 if(!temp) return false; //Realloc failed.
186 memcpy(temp+ulhs,rhs,urhs*sizeof(TCHAR)); //Concatenate.
187 temp[ulhs+urhs] = 0; //Terminate it.
188 *lhs = temp;
189 return true;
193 inline static bool chartype_symbol(TCHAR c) //Character is alphanumeric, -or- '_', -or- ':', -or- '-', -or- '.'.
194 { return (_istalnum(c)||c==_T('_')||c==_T(':')||c==_T('-')||c==_T('.')); }
195 inline static bool chartype_space(TCHAR c) //Character is greater than 0 or character is less than exclamation.
196 { return (c>0 && c<_T('!')); }
197 inline static bool chartype_enter(TCHAR c) //Character is '&lt;'.
198 { return (c==_T('<')); }
199 inline static bool chartype_leave(TCHAR c) //Character is '&gt;'.
200 { return (c==_T('>')); }
201 inline static bool chartype_close(TCHAR c) //Character is '/'.
202 { return (c==_T('/')); }
203 inline static bool chartype_equals(TCHAR c) //Character is '='.
204 { return (c==_T('=')); }
205 inline static bool chartype_special(TCHAR c) //Character is '!'.
206 { return (c==_T('!')); }
207 inline static bool chartype_pi(TCHAR c) //Character is '?'.
208 { return (c==_T('?')); }
209 inline static bool chartype_dash(TCHAR c) //Character is '-'.
210 { return (c==_T('-')); }
211 inline static bool chartype_quote(TCHAR c) //Character is &quot;&lsquo;&quot; -or- &lsquo;&quot;&lsquo;.
212 { return (c==_T('"')||c==_T('\'')); }
213 inline static bool chartype_lbracket(TCHAR c) //Character is '['.
214 { return (c==_T('[')); }
215 inline static bool chartype_rbracket(TCHAR c) //Character is ']'.
216 { return (c==_T(']')); }
219 #ifdef PUGOPT_NONSEG
222 //<summary>Concatenate 'rhs' to 'lhs', growing 'lhs' if neccessary.</summary>
223 //<param name="lhs">Pointer to pointer to receiving string. Note: If '*lhs' is not null, it must have been dynamically allocated using 'malloc'.</param>
224 //<param name="rhs">Source.</param>
225 //<param name="lsize">Specifies the length of *lhs in bytes and returns its new length.</param>
226 //<param name="rsize">Specifies the length of *rhs in bytes.</param>
227 //<returns>Success if 'realloc' was successful.</returns>
228 //<remarks>'lhs' is resized and 'rhs' is concatenated to it.</remarks>
229 inline static bool strcatgrown_impl(TCHAR** lhs,const TCHAR* rhs,unsigned int& lsize,unsigned int rsize)
231 if(!*lhs) //Null, allocate and copy.
233 *lhs = (TCHAR*) malloc(rsize+sizeof(TCHAR));
234 if(!*lhs)
236 lsize = 0;
237 return false; //Allocate failed.
239 memcpy(*lhs,rhs,rsize); //Concatenate.
240 *(*lhs + rsize) = 0; //Terminate it.
241 lsize = rsize;
243 else //Reallocate. NF I don't think this is right for MBCS, nor is code in 'StrCatGrow()'.
245 TCHAR* temp = (TCHAR*) realloc(*lhs,lsize + rsize + sizeof(TCHAR));
246 if(!temp) return false; //Realloc failed.
247 memcpy(temp+lsize,rhs,rsize); //Concatenate.
248 lsize += rsize; //Set new length.
249 temp[lsize] = 0; //Terminate it.
250 *lhs = temp;
252 return true;
255 //<summary>Concatenate 'rhs' to 'lhs', growing 'lhs' if neccessary.</summary>
256 //<param name="lhs">Pointer to pointer to receiving string. Note: If '*lhs' is not null, it must have been dynamically allocated using 'malloc'.</param>
257 //<param name="rhs">Source.</param>
258 //<param name="lsize">Specifies the length of *lhs in bytes and returns its new length.</param>
259 //<returns>Success if 'realloc' was successful.</returns>
260 //<remarks>'lhs' is resized and 'rhs' is concatenated to it.</remarks>
261 inline static bool strcatgrown(TCHAR** lhs,const TCHAR* rhs,unsigned int& lsize)
263 const unsigned int rsize = _tcslen(rhs) * sizeof(TCHAR);
264 return pug::strcatgrown_impl(lhs,rhs,lsize,rsize);
267 //<summary>Trim leading and trailing whitespace.</summary>
268 //<param name="s">Pointer to pointer to string.</param>
269 //<param name="len">Specifies the length of *s in bytes and returns its new length.</param>
270 //<returns>Success.</returns>
271 //<remarks>*s is modified to point to the first non-white character in the string.</remarks>
272 inline static bool strwtrim(TCHAR** s,unsigned int& len)
274 if(!s || !*s) return false;
275 TCHAR* pse = *s + len;
276 while(*s < pse && pug::chartype_space(**s)) //Find first non-white character.
277 ++*s; //As long as we hit whitespace, increment the string pointer.
278 for(; *s < --pse;) //As long as we hit whitespace, decrement.
280 if(!pug::chartype_space(*pse))
282 len = pse + 1 - *s;
283 break;
286 return true;
290 #else
293 //<summary>Trim leading and trailing whitespace.</summary>
294 //<param name="s">Pointer to pointer to string.</param>
295 //<returns>Success.</returns>
296 inline static bool strwtrim(TCHAR** s)
298 if(!s || !*s) return false;
299 while(**s > 0 && **s < _T('!')) ++*s; //As long as we hit whitespace, increment the string pointer.
300 const TCHAR* temp = *s;
301 while(0 != *temp++); //Find the terminating null.
302 long i, n = (long)(temp-*s-1);
303 --n; //Start from the last string TCHAR.
304 for(i=n; (i > -1) && (*s)[i] > 0 && (*s)[i] < _T('!'); --i); //As long as we hit whitespace, decrement.
305 if(i<n) (*s)[i+1] = 0; //Zero-terminate.
306 return true;
310 //<summary>
311 // In situ trim leading and trailing whitespace, then convert all consecutive
312 // whitespace to a single space TCHAR.
313 //</summary>
314 //<param name="s">Pointer to pointer to string.</param>
315 //<returns>Success.</returns>
316 inline static bool strwnorm(TCHAR** s)
318 if(!s || !*s) return false; //No string to normalize.
319 while(**s > 0 && **s < _T('!')) ++(*s); //As long as we hit whitespace, increment the string pointer.
320 const TCHAR* temp = *s;
321 while(0 != *temp++); //Find the terminating null.
322 long n = (long)(temp-*s-1);
323 TCHAR* norm = (TCHAR*)malloc(sizeof(TCHAR)*(n+1)); //Allocate a temporary normalization buffer.
324 if(!norm) return false; //Allocation failed.
325 memset(norm,0,sizeof(TCHAR)*(n+1)); //Zero it.
326 long j = 1;
327 norm[0] = (*s)[0];
328 long i;
329 for(i=1; i<n; ++i) //For each character, starting at offset 1.
331 if((*s)[i] < _T('!')) //Whitespace-like.
333 if((*s)[i-1] >= _T('!')) //Previous was not whitespace-like.
335 norm[j] = _T(' '); //Convert to a space TCHAR.
336 ++j; //Normalization buffer grew by one TCHAR.
339 else { norm[j] = (*s)[i]; ++j; } //Not whitespace, so just copy over.
341 if(j < n) //Normalization buffer is actually different that input.
343 _tcsncpy(*s,norm,j); //So, copy it back to input.
344 (*s)[j] = 0; //Zero-terminate.
346 free(norm); //Don't need this anymore.
347 --n; //Start from the last string TCHAR.
348 for(i=n; (i > -1) && (*s)[i] > 0 && (*s)[i] < _T('!'); --i); //Find the first non-whitespace from the end.
349 if(i<n) (*s)[i+1] = 0; //Truncate it.
350 return true;
354 #endif
357 //<summary>Set structure string member to given value.</summary>
358 //<param name="dest">Pointer to pointer to destination.</param>
359 //<param name="src">Source.</param>
360 //<param name="insitu">Pointer to boolean in-situ string flag.</param>
361 //<returns>True if member was set to the new value.</returns>
362 //<remarks>
363 // If 'src' is larger than 'dest' then 'dest' is resized, in which case
364 // it is probably no longer in-situ,and 'in_situ' is set to false. If
365 // 'dest' is already no longer in-situ, and 'src' is too small then the
366 // existing memory pointed to is freed. If 'dest' is larger than or equal
367 // to 'dest' then it is merely copied with no resize.
368 //</remarks>
369 inline static bool strcpyinsitu
371 TCHAR** dest,
372 const TCHAR* src,
373 bool* insitu
374 #ifdef PUGOPT_NONSEG
376 unsigned int& destlen
377 #endif
380 if(!dest || !src || !insitu) return false; //Bad argument(s), so fail.
381 #ifndef PUGOPT_NONSEG //Always use heap for our r/o string.
382 size_t l = (*dest) ? _tcslen(*dest) : 0; //How long is destination?
383 if(l >= _tcslen(src)) //Destination is large enough, so just copy.
385 _tcscpy(*dest,src); //Copy.
386 return true; //Success.
388 else //Destination is too small.
389 #endif
391 if(*dest && !*insitu) free(*dest); //If destination is not in-situ, then free it.
392 *dest = NULL; //Mark destination as NULL, forcing 'StrCatGrow' to 'malloc.
393 #ifdef PUGOPT_NONSEG
394 if(strcatgrown(dest,src,destlen)) //Allocate & copy source to destination
395 #else
396 if(strcatgrow(dest,src)) //Allocate & copy source to destination
397 #endif
399 *insitu = false; //Mark as no longer being in-situ, so we can free it later.
400 return true; //Success.
403 return false; //Failure.
407 //<summary>Character set pattern match.</summary>
408 //<param name="lhs">String or expression for left-hand side of comparison.</param>
409 //<param name="rhs">String for right-hand side of comparison.</param>
410 //<remarks>Used by 'strcmpwild'.</remarks>
411 inline int strcmpwild_cset(const TCHAR** src,const TCHAR** dst)
413 int find = 0;
414 int excl = 0;
415 int star = 1;
416 if(**src == _T('!'))
418 excl = 1;
419 ++(*src);
421 while(**src != _T(']') || star == 1)
423 if(find == 0)
425 if(**src == _T('-') && *(*src-1) < *(*src+1) && *(*src+1) != _T(']') && star == 0)
427 if(**dst >= *(*src-1) && **dst <= *(*src+1))
429 find = 1;
430 ++(*src);
433 else if(**src == **dst) find = 1;
435 ++(*src);
436 star = 0;
438 if(excl == 1) find = (1 - find);
439 if(find == 1) ++(*dst);
440 return find;
444 inline int strcmpwild_impl(const TCHAR* src,const TCHAR* dst); //Forward declaration.
447 //<summary>Wildcard pattern match.</summary>
448 //<param name="lhs">String or expression for left-hand side of comparison.</param>
449 //<param name="rhs">String for right-hand side of comparison.</param>
450 //<remarks>Used by 'strcmpwild'.</remarks>
451 inline int strcmpwild_astr(const TCHAR** src,const TCHAR** dst)
453 int find = 1;
454 ++(*src);
455 while((**dst != 0 && **src == _T('?')) || **src == _T('*'))
457 if(**src == _T('?')) ++(*dst);
458 ++(*src);
460 while(**src == _T('*')) ++(*src);
461 if(**dst == 0 && **src != 0) return 0;
462 if(**dst == 0 && **src == 0) return 1;
463 else
465 if(strcmpwild_impl(*src,*dst) == 0)
469 ++(*dst);
470 while(**src != **dst && **src != _T('[') && **dst != 0)
471 ++(*dst);
473 while((**dst != 0) ? strcmpwild_impl(*src,*dst) == 0 : 0 != (find=0));
475 if(**dst == 0 && **src == 0) find = 1;
476 return find;
481 //<summary>Compare two strings, with globbing, and character sets.</summary>
482 //<param name="lhs">String or expression for left-hand side of comparison.</param>
483 //<param name="rhs">String for right-hand side of comparison.</param>
484 //<remarks>Used by 'strcmpwild'.</remarks>
485 inline int strcmpwild_impl(const TCHAR* src,const TCHAR* dst)
487 int find = 1;
488 for(; *src != 0 && find == 1 && *dst != 0; ++src)
490 switch(*src)
492 case _T('?'): ++dst; break;
493 case _T('['): ++src; find = strcmpwild_cset(&src,&dst); break;
494 case _T('*'): find = strcmpwild_astr(&src,&dst); --src; break;
495 default : find = (int) (*src == *dst); ++dst;
498 while(*src == _T('*') && find == 1) ++src;
499 return (int) (find == 1 && *dst == 0 && *src == 0);
502 //<summary>Compare two strings, with globbing, and character sets.</summary>
503 //<param name="lhs">String or expression for left-hand side of comparison.</param>
504 //<param name="rhs">String for right-hand side of comparison.</param>
505 //<returns>
506 // Returns 1 if src does not match dst, or -1 if either src or dst are null,
507 // or 0 if src matches dst.
508 //</returns>
509 //<remarks>
510 // Simple regular expressions are permitted in 'src': The character '*' matches
511 // zero or more characters up to the next pattern, or the end of the string. The
512 // '?' character matches any single character. Character sets and negation are
513 // also permitted, for example, '[abcd]', '[a-zA-Z]', etc.
514 //</remarks>
515 inline int strcmpwild(const TCHAR* src,const TCHAR* dst)
517 if(!src || !dst) return -1;
518 return (strcmpwild_impl(src,dst)==1)?0:1;
522 //<summary>Allocate & init an xml_attribute_struct structure.</summary>
523 //<returns>Pointer to new xml_attribute_struct structure.</returns>
524 inline static xml_attribute_struct* new_attribute(void)
526 xml_attribute_struct* p = (xml_attribute_struct*)malloc(sizeof(xml_attribute_struct)); //Allocate one attribute.
527 if(p) //If allocation succeeded.
529 p->name = p->value = 0; //No name or value.
530 #ifdef PUGOPT_NONSEG
531 p->name_size = p->value_size = 0; //Lengths of zero.
532 #endif
533 p->name_insitu = p->value_insitu = true; //Default to being in-situ of the parse string.
535 return p;
539 //<summary>Allocate & init an xml_node_struct structure.</summary>
540 //<param name="type">Desired node type.</param>
541 //<returns>Pointer to new xml_node_struct structure.</returns>
542 inline static xml_node_struct* new_node(xml_node_type type = node_element)
544 xml_node_struct* p = (xml_node_struct*)malloc(sizeof(xml_node_struct)); //Allocate one node.
545 if(p) //If allocation succeeded.
547 p->name = p->value = 0; //No name or data.
548 #ifdef PUGOPT_NONSEG
549 p->name_size = p->value_size = 0;
550 #endif
551 p->type = type; //Set the desired type.
552 p->attributes = p->children = 0; //No attributes or children.
553 p->name_insitu = p->value_insitu = true; //Default to being in-situ of the parse string.
556 type != node_document && //None of these will have attributes.
557 type != node_pcdata &&
558 type != node_cdata &&
559 type != node_include &&
560 type != node_comment
562 p->attribute = (xml_attribute_struct**)malloc(sizeof(xml_attribute_struct*)); //Allocate one attribute.
563 else p->attribute = NULL;
564 p->attribute_space = (p->attribute) ? 1 : 0;
567 type == node_element || //Only these will have children.
568 type == node_doctype ||
569 type == node_document
571 p->child = (xml_node_struct**)malloc(sizeof(xml_node_struct*)); //Allocate one child.
572 else p->child = NULL;
573 p->child_space = (p->child) ? 1 : 0;
575 return p;
579 //<summary>Allocate & append a new xml_node_struct onto the given parent.</summary>
580 //<param name="parent">Pointer to parent node.</param>
581 //<param name="grow">Pointer space growth increment.</param>
582 //<param name="type">Desired node type.</param>
583 //<returns>Pointer to new node.</returns>
584 //<remarks>Child pointer space of 'node' may be reallocated.</remarks>
585 inline static xml_node_struct* append_node(xml_node_struct* parent,long grow,xml_node_type type = node_element)
587 if(!parent) return NULL; //Must have a parent.
588 if(parent->children == parent->child_space) //Out of pointer space.
590 xml_node_struct** t = (xml_node_struct**)realloc(parent->child,sizeof(xml_node_struct*)*(parent->child_space+grow)); //Grow pointer space.
591 if(t) //Reallocation succeeded.
593 parent->child = t;
594 parent->child_space += grow; //Update the available space.
597 xml_node_struct* child = new_node(type); //Allocate a new child.
598 child->parent = parent; //Set it's parent pointer.
599 parent->child[parent->children] = child; //Set the parent's child pointer.
600 parent->children++; //One more child.
601 return child;
605 //<summary>Allocate & append a new attribute to the given xml_node_struct.</summary>
606 //<param name="node">Pointer to parent node.</param>
607 //<param name="grow">Pointer space growth increment.</param>
608 //<returns>Pointer to appended xml_attribute_struct.</returns>
609 //<remarks>Attribute pointer space of 'node' may be reallocated.</remarks>
610 inline static xml_attribute_struct* append_attribute(xml_node_struct* node,long grow)
612 if(!node) return NULL;
613 xml_attribute_struct* a = new_attribute();
614 if(!a) return NULL;
615 if(node->attributes == node->attribute_space) //Out of space, so grow.
617 xml_attribute_struct** t = (xml_attribute_struct**)realloc(node->attribute,sizeof(xml_node_struct*)*(node->attribute_space+grow));
618 if(t)
620 node->attribute = t;
621 node->attribute_space += grow;
624 node->attribute[node->attributes] = a;
625 node->attributes++;
626 return a;
630 //<summary>Non-recursively free a tree.</summary>
631 //<param name="root">
632 // Pointer to the root of the tree. Note: 'root' must have been dynamically
633 // allocated using 'malloc' or 'realloc', as 'free_node' tries to also free
634 // the structure pointed to by 'root'.
635 //</param>
636 //<remarks>'root' no longer points to a valid structure.</remarks>
637 inline static void free_node(xml_node_struct* node)
639 if(!node) return;
641 register xml_node_struct* cursor = node;
643 //Free all children of children.
646 LOC_STEP_INTO:
647 for(; cursor->children>0; --cursor->children) //Free each child in turn; 'children' keeps count while we jump around.
649 register xml_node_struct* t = cursor->child[cursor->children-1]; //Take a pointer to the child.
650 if(t && t->children) //If the child has children.
652 cursor = t; //Step in.
653 goto LOC_STEP_INTO; //Step into this node.
655 else if(t)
657 if(t->attributes) //Child has attributes.
659 register unsigned int n = t->attributes; //Free each attribute.
660 for(register unsigned int i=0; i<n; ++i)
662 if(t->attribute[i]->name && !t->attribute[i]->name_insitu)
663 free(t->attribute[i]->name);
664 if(t->attribute[i]->value && !t->attribute[i]->value_insitu)
665 free(t->attribute[i]->value);
666 free(t->attribute[i]);
669 if(t->attribute) free(t->attribute); //Free attribute pointer space.
670 if(t->child) free(t->child); //Free child pointer space.
671 if(t->name && !t->name_insitu) free(t->name);
672 if(t->value && !t->value_insitu) free(t->value);
673 free(t); //Free the child node.
676 cursor = cursor->parent; //Step out.
678 while(cursor->children); //While there are children.
679 //Finally, free the root's children & the root itself.
680 if(cursor->attributes)
682 register unsigned int n = cursor->attributes;
683 for(register unsigned int i=0; i<n; ++i)
685 if(cursor->attribute[i]->name && !cursor->attribute[i]->name_insitu)
686 free(cursor->attribute[i]->name);
687 if(cursor->attribute[i]->value && !cursor->attribute[i]->value_insitu)
688 free(cursor->attribute[i]->value);
689 free(cursor->attribute[i]);
692 if(cursor->attribute) free(cursor->attribute); //Free attribute pointer space.
693 if(cursor->child) free(cursor->child); //Free child pointer space.
694 if(cursor->name && !cursor->name_insitu) free(cursor->name); //Free name & data.
695 if(cursor->value && !cursor->value_insitu) free(cursor->value);
696 free(cursor); //Free the root itself.
699 //<summary>Recursively free a tree.</summary>
700 //<param name="root">Pointer to the root of the tree.</param>
701 //<remarks>Not used.</remarks>
702 inline static void free_node_recursive(xml_node_struct* root)
704 if(root)
706 unsigned int n = root->attributes;
707 register unsigned int i;
708 for(i=0; i<n; i++)
710 if(root->attribute[i]->name && !root->attribute[i]->name_insitu)
711 free(root->attribute[i]->name);
712 if(root->attribute[i]->value && !root->attribute[i]->value_insitu)
713 free(root->attribute[i]->value);
714 free(root->attribute[i]);
716 free(root->attribute);
717 n = root->children;
718 for(i=0; i<n; i++)
719 free_node_recursive(root->child[i]);
720 free(root->child);
721 if(root->name && !root->name_insitu) free(root->name);
722 if(root->value && !root->value_insitu) free(root->value);
723 free(root);
728 //<summary>Parser utilities.</summary>
729 #define SKIPWS() { while(chartype_space(*s)) ++s; if(*s==0) return s; }
730 #define OPTSET(OPT) ( optmsk & OPT )
731 #define PUSHNODE(TYPE) { cursor = append_node(cursor,growby,TYPE); }
732 #define POPNODE() { cursor = cursor->parent; }
733 #define SCANFOR(X) { while(*s!=0 && !(X)) ++s; if(*s==0) return s; }
734 #define SCANWHILE(X) { while((X)) ++s; if(*s==0) return s; }
735 #ifndef PUGOPT_NONSEG
736 # define ENDSEG() { ch = *s; *s = 0; ++s; if(*s==0) return s; }
737 #else
738 # define ENDSEG() { ch = *s; ++s; if(*s==0) return s; }
739 # define SETLEN() ( cursor->value_size = s - cursor->value )
740 # define ENDSEGDAT() { ch = *s; SETLEN(); ++s; if(*s==0) return s; }
741 # define ENDSEGNAM(S) { ch = *s; S->name_size = s - S->name; ++s; if(*s==0) return s; }
742 # define ENDSEGATT(S) { ch = *s; S->value_size = s - S->value; ++s; if(*s==0) return s; }
743 #endif
746 //<summary>Static single-pass in-situ parse the given xml string.</summary>
747 //<param name="s">Pointer to XML-formatted string.</param>
748 //<param name="root">Pointer to root.</param>
749 //<param name="grow">Pointer space growth increment.</param>
750 //<param name="optmsk">Parse options mask.</param>
751 //<returns>Last string position or null.</returns>
752 //<remarks>
753 // Input string is zero-segmented if 'PUGOPT_NONSEG' is not defined. Memory
754 // may have been allocated to 'root' (free with 'free_node').
755 //</remarks>
756 static TCHAR* parse(register TCHAR* s,xml_node_struct* xmldoc,long growby,unsigned long optmsk = parse_default)
758 if(!s || !xmldoc) return s;
759 TCHAR ch = 0; //Current char, in cases where we must null-terminate before we test.
760 xml_node_struct* cursor = xmldoc; //Tree node cursor.
761 TCHAR* mark = s; //Marked string position for temporary look-ahead.
762 while(*s!=0)
764 LOC_SEARCH: //Obliviously search for next element.
765 SCANFOR(chartype_enter(*s)); //Find the next '<'.
766 if(chartype_enter(*s))
768 ++s;
769 LOC_CLASSIFY: //What kind of element?
770 if(chartype_pi(*s)) //'<?...'
772 ++s;
773 if(chartype_symbol(*s) && OPTSET(parse_pi))
775 mark = s;
776 SCANFOR(chartype_pi(*s)); //Look for terminating '?'.
777 #ifndef PUGOPT_NONSEG
778 if(chartype_pi(*s)) *s = _T('/'); //Same semantics as for '<.../>', so fudge it.
779 #endif
780 s = mark;
781 PUSHNODE(node_pi); //Append a new node on the tree.
782 goto LOC_ELEMENT; //Go read the element name.
784 else //Bad PI or parse_pi not set.
786 SCANFOR(chartype_leave(*s)); //Look for '>'.
787 ++s;
788 mark = 0;
789 continue;
792 else if(chartype_special(*s)) //'<!...'
794 ++s;
795 if(chartype_dash(*s)) //'<!-...'
797 ++s;
798 if(OPTSET(parse_comments) && chartype_dash(*s)) //'<!--...'
800 ++s;
801 PUSHNODE(node_comment); //Append a new node on the tree.
802 cursor->value = s; //Save the offset.
803 while(*s!=0 && *(s+1) && *(s+2) && !((chartype_dash(*s) && chartype_dash(*(s+1))) && chartype_leave(*(s+2)))) ++s; //Scan for terminating '-->'.
804 if(*s==0) return s;
805 #ifdef PUGOPT_NONSEG
806 SETLEN(); //NF 19 Jan 2003.
807 #else
808 *s = 0; //Zero-terminate this segment at the first terminating '-'.
809 #endif
810 if(OPTSET(parse_trim_comment)) //Trim whitespace.
812 #ifdef PUGOPT_NONSEG
813 strwtrim(&cursor->value,cursor->value_size);
814 #else
815 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
816 else strwtrim(&cursor->value);
817 #endif
819 s += 2; //Step over the '\0-'.
820 POPNODE(); //Pop since this is a standalone.
821 goto LOC_LEAVE; //Look for any following PCDATA.
823 else
825 while(*s!=0 && *(s+1)!=0 && *(s+2)!=0 && !((chartype_dash(*s) && chartype_dash(*(s+1))) && chartype_leave(*(s+2)))) ++s; //Scan for terminating '-->'.
826 if(*s==0) return s;
827 s += 2;
828 goto LOC_LEAVE; //Look for any following PCDATA.
831 else if(chartype_lbracket(*s)) //'<![...'
833 ++s;
834 if(*s==_T('I')) //'<![I...'
836 ++s;
837 if(*s==_T('N')) //'<![IN...'
839 ++s;
840 if(*s==_T('C')) //'<![INC...'
842 ++s;
843 if(*s==_T('L')) //'<![INCL...'
845 ++s;
846 if(*s==_T('U')) //'<![INCLU...'
848 ++s;
849 if(*s==_T('D')) //'<![INCLUD...'
851 ++s;
852 if(*s==_T('E')) //'<![INCLUDE...'
854 ++s;
855 if(chartype_lbracket(*s)) //'<![INCLUDE[...'
857 ++s;
858 if(OPTSET(node_cdata))
860 PUSHNODE(node_include); //Append a new node on the tree.
861 cursor->value = s; //Save the offset.
862 while(!(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)))) ++s; //Scan for terminating ']]>'.
863 if(chartype_rbracket(*s))
865 #ifdef PUGOPT_NONSEG
866 SETLEN(); //NF 19 Jan 2003.
867 #else
868 *s = 0; //Zero-terminate this segment.
869 #endif
870 ++s;
871 if(OPTSET(parse_trim_cdata)) //Trim whitespace.
873 #ifdef PUGOPT_NONSEG
874 strwtrim(&cursor->value, cursor->value_size);
875 #else
876 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
877 else strwtrim(&cursor->value);
878 #endif
881 POPNODE(); //Pop since this is a standalone.
883 else //Flagged for discard, but we still have to scan for the terminator.
885 while(*s!=0 && *(s+1)!=0 && *(s+2)!=0 && !(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)))) ++s; //Scan for terminating ']]>'.
886 ++s;
888 ++s; //Step over the last ']'.
889 goto LOC_LEAVE; //Look for any following PCDATA.
898 else if(*s==_T('C')) //'<![C...'
900 ++s;
901 if(*s==_T('D')) //'<![CD...'
903 ++s;
904 if(*s==_T('A')) //'<![CDA...'
906 ++s;
907 if(*s==_T('T')) //'<![CDAT...'
909 ++s;
910 if(*s==_T('A')) //'<![CDATA...'
912 ++s;
913 if(chartype_lbracket(*s)) //'<![CDATA[...'
915 ++s;
916 if(OPTSET(parse_cdata))
918 PUSHNODE(node_cdata); //Append a new node on the tree.
919 cursor->value = s; //Save the offset.
920 while(*s!=0 && *(s+1)!=0 && *(s+2)!=0 && !(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)))) ++s; //Scan for terminating ']]>'.
921 if(*(s+2)==0) return s; //Very badly formed.
922 if(chartype_rbracket(*s))
924 #ifdef PUGOPT_NONSEG
925 SETLEN(); //NF 19 Jan 2003.
926 #else
927 *s = 0; //Zero-terminate this segment.
928 #endif
929 ++s;
930 if(OPTSET(parse_trim_cdata)) //Trim whitespace.
932 #ifdef PUGOPT_NONSEG
933 strwtrim(&cursor->value,cursor->value_size);
934 #else
935 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
936 else strwtrim(&cursor->value);
937 #endif
940 POPNODE(); //Pop since this is a standalone.
942 else //Flagged for discard, but we still have to scan for the terminator.
944 while(*s!=0 && *(s+1)!=0 && *(s+2)!=0 && !(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)))) ++s; //Scan for terminating ']]>'.
945 ++s;
947 ++s; //Step over the last ']'.
948 goto LOC_LEAVE; //Look for any following PCDATA.
955 continue; //Probably a corrupted CDATA section, so just eat it.
957 else if(*s==_T('D')) //'<!D...'
959 ++s;
960 if(*s==_T('O')) //'<!DO...'
962 ++s;
963 if(*s==_T('C')) //'<!DOC...'
965 ++s;
966 if(*s==_T('T')) //'<!DOCT...'
968 ++s;
969 if(*s==_T('Y')) //'<!DOCTY...'
971 ++s;
972 if(*s==_T('P')) //'<!DOCTYP...'
974 ++s;
975 if(*s==_T('E')) //'<!DOCTYPE...'
977 ++s;
978 SKIPWS(); //Eat any whitespace.
979 xml_attribute_struct* a = 0;
980 if(OPTSET(parse_doctype))
982 PUSHNODE(node_doctype); //Append a new node on the tree.
983 a = append_attribute(cursor,3); //Store the DOCTYPE name.
984 a->value = a->name = s; //Save the offset.
986 SCANWHILE(chartype_symbol(*s)); //'<!DOCTYPE symbol...'
987 #ifdef PUGOPT_NONSEG
988 if(OPTSET(parse_doctype))
989 a->name_size = a->value_size = s - a->value; //Save the length. rem: Before ENDSEG()
990 #endif
991 ENDSEG(); //Save char in 'ch', terminate & step over.
992 if(chartype_space(ch)) SKIPWS(); //Eat any whitespace.
993 LOC_DOCTYPE_SYMBOL:
994 if(chartype_symbol(*s))
996 mark = s;
997 SCANWHILE(chartype_symbol(*s)); //'...symbol SYSTEM...'
998 if(OPTSET(parse_doctype))
1000 a = append_attribute(cursor,1);
1001 a->value = a->name = mark;
1002 #ifdef PUGOPT_NONSEG
1003 a->value_size = a->name_size = s - mark; //NF 19 Jan 2003.
1004 #else
1005 *s = 0;
1006 #endif
1008 ++s;
1009 SKIPWS();
1011 if(chartype_quote(*s)) //'...SYSTEM "..."'
1013 LOC_DOCTYPE_QUOTE:
1014 ch = *s;
1015 ++s;
1016 mark = s;
1017 while(*s!=0 && *s != ch) ++s;
1018 if(*s!=0)
1020 if(OPTSET(parse_doctype))
1022 a = append_attribute(cursor,1);
1023 a->value = mark;
1024 #ifdef PUGOPT_NONSEG
1025 a->value_size = s - mark; //NF 19 Jan 2003.
1026 #else
1027 *s = 0;
1028 #endif
1030 ++s;
1031 SKIPWS(); //Eat whitespace.
1032 if(chartype_quote(*s)) goto LOC_DOCTYPE_QUOTE; //Another quoted section to store.
1033 else if(chartype_symbol(*s)) goto LOC_DOCTYPE_SYMBOL; //Not wellformed, but just parse it.
1036 if(chartype_lbracket(*s)) //'...[...'
1038 ++s; //Step over the bracket.
1039 if(OPTSET(parse_doctype)) cursor->value = s; //Store the offset.
1040 unsigned int bd = 1; //Bracket depth counter.
1041 while(*s!=0) //Loop till we're out of all brackets.
1043 if(chartype_rbracket(*s)) --bd;
1044 else if(chartype_lbracket(*s)) ++bd;
1045 if(bd == 0) break;
1046 ++s;
1048 //Note: 's' now points to end of DTD, i.e.: ']'.
1049 if(OPTSET(parse_doctype))
1051 //Note: If we aren't parsing the DTD ('!parse_dtd', etc.) then it is stored in the DOM as one whole chunk.
1052 #ifdef PUGOPT_NONSEG
1053 SETLEN(); //NF 19 Jan 2003
1054 #else
1055 *s = 0; //Zero-terminate.
1056 #endif
1057 if(OPTSET(parse_dtd)||OPTSET(parse_dtd_only))
1059 if(OPTSET(parse_dtd))
1061 #ifdef PUGOPT_NONSEG
1062 TCHAR svch = *s;
1065 *s = 0; //Zero-terminate.
1066 parse(cursor->value,cursor,growby,optmsk); //Parse it.
1068 catch(...){ assert(false); }
1069 *s = svch;
1070 #else
1071 parse(cursor->value,cursor,growby,optmsk); //Parse it.
1072 #endif
1074 if(OPTSET(parse_dtd_only)) return (s+1); //Flagged to parse DTD only, so leave here.
1076 else if(OPTSET(parse_trim_doctype)) //Trim whitespace.
1078 #ifdef PUGOPT_NONSEG
1079 strwtrim(&cursor->value, cursor->value_size);
1080 #else
1081 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
1082 else strwtrim(&cursor->value);
1083 #endif
1085 ++s; //Step over the zero.
1086 POPNODE(); //Pop since this is a standalone.
1088 SCANFOR(chartype_leave(*s));
1089 continue;
1091 //Fall-through; make sure we pop.
1092 POPNODE(); //Pop since this is a standalone.
1093 continue;
1101 else if(chartype_symbol(*s)) //An inline DTD tag.
1103 mark = s;
1104 SCANWHILE(chartype_symbol(*s));
1105 ENDSEG(); //Save char in 'ch', terminate & step over.
1106 xml_node_type e = node_dtd_entity;
1107 #ifdef PUGOPT_NONSEG
1108 const unsigned int dtdilen = (s - 1) - mark;
1109 if(_tcsncmp(mark,_T("ATTLIST"),max((7*sizeof(TCHAR)),dtdilen))==0) e = node_dtd_attlist;
1110 else if(_tcsncmp(mark,_T("ELEMENT"),max((7*sizeof(TCHAR)),dtdilen))==0) e = node_dtd_element;
1111 else if(_tcsncmp(mark,_T("NOTATION"),max((8*sizeof(TCHAR)),dtdilen))==0) e = node_dtd_notation;
1112 #else
1113 if(_tcscmp(mark,_T("ATTLIST"))==0) e = node_dtd_attlist;
1114 else if(_tcscmp(mark,_T("ELEMENT"))==0) e = node_dtd_element;
1115 else if(_tcscmp(mark,_T("NOTATION"))==0) e = node_dtd_notation;
1116 #endif
1117 PUSHNODE(e); //Append a new node on the tree.
1118 if(*s!=0 && chartype_space(ch))
1120 SKIPWS(); //Eat whitespace.
1121 if(chartype_symbol(*s) || *s==_T('%'))
1123 mark = s;
1124 if(*s==_T('%')) //Could be '<!ENTITY % name' -or- '<!ENTITY %name'
1126 #ifdef PUGOPT_NONSEG
1127 //Note: For memory-mapped file support we need to treat 's' as read-only so we can't do '*(s-1) = _T('%');' below.
1128 cursor->name = mark; //Sort out extraneous whitespace when we retrieve it. TODO: Whitespace cleanup.
1129 #endif
1130 ++s;
1131 if(chartype_space(*s))
1133 SKIPWS(); //Eat whitespace.
1134 #ifndef PUGOPT_NONSEG
1135 *(s-1) = _T('%');
1136 cursor->name = (s-1);
1137 #endif
1139 #ifndef PUGOPT_NONSEG
1140 else cursor->name = mark;
1141 #endif
1143 else cursor->name = s;
1144 SCANWHILE(chartype_symbol(*s));
1145 #ifdef PUGOPT_NONSEG
1146 cursor->name_size = s - cursor->name;
1147 #endif
1148 ENDSEG(); //Save char in 'ch', terminate & step over.
1149 if(chartype_space(ch))
1151 SKIPWS(); //Eat whitespace.
1152 if(e == node_dtd_entity) //Special case; may have multiple quoted sections w/anything inside.
1154 cursor->value = s; //Just store everything here.
1155 bool qq = false; //Quote in/out flag.
1156 while(*s != 0) //Loop till we find the right sequence.
1158 if(!qq && chartype_quote(*s)){ ch = *s; qq = true; }
1159 else if(qq && *s == ch) qq = false;
1160 else if(!qq && chartype_leave(*s)) //Not in quoted reqion and '>' hit.
1162 #ifdef PUGOPT_NONSEG
1163 SETLEN(); //NF 19 Jan 2003.
1164 #else
1165 *s = 0;
1166 #endif
1167 ++s;
1168 if(OPTSET(parse_trim_entity))
1170 #ifdef PUGOPT_NONSEG
1171 strwtrim(&cursor->value,cursor->value_size);
1172 #else
1173 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
1174 else strwtrim(&cursor->value);
1175 #endif
1177 POPNODE();
1178 goto LOC_SEARCH;
1180 ++s;
1182 if(OPTSET(parse_trim_entity))
1184 #ifdef PUGOPT_NONSEG
1185 strwtrim(&cursor->value, cursor->value_size);
1186 #else
1187 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
1188 else strwtrim(&cursor->value);
1189 #endif
1192 else
1194 cursor->value = s;
1195 SCANFOR(chartype_leave(*s)); //Just look for '>'.
1196 #ifdef PUGOPT_NONSEG
1197 SETLEN(); //NF 19 Jan 2003.
1198 #else
1199 *s = 0;
1200 #endif
1201 ++s;
1202 if(OPTSET(parse_trim_entity))
1204 #ifdef PUGOPT_NONSEG
1205 strwtrim(&cursor->value, cursor->value_size);
1206 #else
1207 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
1208 else strwtrim(&cursor->value);
1209 #endif
1211 POPNODE();
1212 goto LOC_SEARCH;
1217 POPNODE();
1220 else if(chartype_symbol(*s)) //'<#...'
1222 cursor = append_node(cursor,growby); //Append a new node to the tree.
1223 LOC_ELEMENT: //Scan for & store element name.
1224 cursor->name = s;
1225 SCANWHILE(chartype_symbol(*s)); //Scan for a terminator.
1226 #ifdef PUGOPT_NONSEG
1227 cursor->name_size = s - cursor->name; //Note: Before ENDSEG().
1228 #endif
1229 ENDSEG(); //Save char in 'ch', terminate & step over.
1232 *s!=0 &&
1234 chartype_close(ch) //'</...'
1235 #ifdef PUGOPT_NONSEG
1236 //||
1237 //chartype_pi(ch) //Treat '?>' as '/>' NF 19 Jan 2003
1238 #endif
1242 SCANFOR(chartype_leave(*s)); //Scan for '>', stepping over the tag name.
1243 POPNODE(); //Pop.
1244 continue;
1246 else if(*s!=0 && !chartype_space(ch)) goto LOC_PCDATA; //No attributes, so scan for PCDATA.
1247 else if(*s!=0 && chartype_space(ch))
1249 SKIPWS(); //Eat any whitespace.
1250 LOC_ATTRIBUTE:
1251 if(chartype_symbol(*s)) //<... #...
1253 xml_attribute_struct* a = append_attribute(cursor,growby); //Make space for this attribute.
1254 a->name = s; //Save the offset.
1255 SCANWHILE(chartype_symbol(*s)); //Scan for a terminator.
1256 #ifdef PUGOPT_NONSEG
1257 ENDSEGNAM(a);
1258 #else
1259 ENDSEG(); //Save char in 'ch', terminate & step over.
1260 #endif
1261 if(*s!=0 && chartype_space(ch)) SKIPWS(); //Eat any whitespace.
1262 if(*s!=0 && (chartype_equals(ch) || chartype_equals(*s))) //'<... #=...'
1264 if(chartype_equals(*s)) ++s;
1265 SKIPWS(); //Eat any whitespace.
1266 if(chartype_quote(*s)) //'<... #="...'
1268 ch = *s; //Save quote char to avoid breaking on "''" -or- '""'.
1269 ++s; //Step over the quote.
1270 a->value = s; //Save the offset.
1271 SCANFOR(*s == ch); //Scan for the terminating quote, or '>'.
1272 #ifdef PUGOPT_NONSEG
1273 ENDSEGATT(a);
1274 #else
1275 ENDSEG(); //Save char in 'ch', terminate & step over.
1276 #endif
1277 if(OPTSET(parse_trim_attribute)) //Trim whitespace.
1279 #ifdef PUGOPT_NONSEG
1280 strwtrim(&a->value,a->value_size);
1281 #else
1282 if(OPTSET(parse_wnorm)) strwnorm(&a->value);
1283 else strwtrim(&a->value);
1284 #endif
1286 if(chartype_leave(*s)){ ++s; goto LOC_PCDATA; }
1287 else if(chartype_close(*s))
1289 ++s;
1290 POPNODE();
1291 SKIPWS(); //Eat any whitespace.
1292 if(chartype_leave(*s)) ++s;
1293 goto LOC_PCDATA;
1295 if(chartype_space(*s)) //This may indicate a following attribute.
1297 SKIPWS(); //Eat any whitespace.
1298 goto LOC_ATTRIBUTE; //Go scan for additional attributes.
1302 if(chartype_symbol(*s)) goto LOC_ATTRIBUTE;
1303 else if(*s!=0 && cursor->type == node_pi)
1305 #ifdef PUGOPT_NONSEG
1306 SCANFOR(chartype_pi(*s)); //compliments change where we don't fudge to '/>' when we find the PI. NF 20 Jan 2003
1307 SKIPWS(); //Eat any whitespace.
1308 if(chartype_pi(*s)) ++s;
1309 #else
1310 SCANFOR(chartype_close(*s));
1311 SKIPWS(); //Eat any whitespace.
1312 if(chartype_close(*s)) ++s;
1313 #endif
1314 SKIPWS(); //Eat any whitespace.
1315 if(chartype_leave(*s)) ++s;
1316 POPNODE();
1317 goto LOC_PCDATA;
1321 LOC_LEAVE:
1322 if(chartype_leave(*s)) //'...>'
1324 ++s; //Step over the '>'.
1325 LOC_PCDATA: //'>...<'
1326 mark = s; //Save this offset while searching for a terminator.
1327 SKIPWS(); //Eat whitespace if no genuine PCDATA here.
1328 if(chartype_enter(*s)) //We hit a '<...', with only whitespace, so don't bother storing anything.
1330 if(chartype_close(*(s+1))) //'</...'
1332 SCANFOR(chartype_leave(*s)); //Scan for '>', stepping over any end-tag name.
1333 POPNODE(); //Pop.
1334 continue; //Continue scanning.
1336 else goto LOC_SEARCH; //Expect a new element enter, so go scan for it.
1338 s = mark; //We hit something other than whitespace; restore the original offset.
1339 PUSHNODE(node_pcdata); //Append a new node on the tree.
1340 cursor->value = s; //Save the offset.
1341 SCANFOR(chartype_enter(*s)); //'...<'
1342 #ifdef PUGOPT_NONSEG
1343 ENDSEGDAT();
1344 #else
1345 ENDSEG(); //Save char in 'ch', terminate & step over.
1346 #endif
1347 if(OPTSET(parse_trim_pcdata)) //Trim whitespace.
1349 #ifdef PUGOPT_NONSEG
1350 strwtrim(&cursor->value,cursor->value_size);
1351 #else
1352 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
1353 else strwtrim(&cursor->value);
1354 #endif
1356 POPNODE(); //Pop since this is a standalone.
1357 if(chartype_enter(ch)) //Did we hit a '<...'?
1359 if(chartype_close(*s)) //'</...'
1361 SCANFOR(chartype_leave(*s)); //'...>'
1362 POPNODE(); //Pop.
1363 goto LOC_LEAVE;
1365 else if(chartype_special(*s)) goto LOC_CLASSIFY; //We hit a '<!...'. We must test this here if we want comments intermixed w/PCDATA.
1366 else if(*s) goto LOC_CLASSIFY;
1367 else return s;
1370 //Fall-through A.
1371 else if(chartype_close(*s)) //'.../'
1373 ++s;
1374 if(chartype_leave(*s)) //'.../>'
1376 POPNODE(); //Pop.
1377 ++s;
1378 continue;
1382 //Fall-through B.
1383 else if(chartype_close(*s)) //'.../'
1385 SCANFOR(chartype_leave(*s)); //'.../>'
1386 POPNODE(); //Pop.
1387 continue;
1391 return s;
1396 //<summary>Read data from the file at 'path' into the buffer. Free with 'free'.</summary>
1397 //<param name="path">File path.</param>
1398 //<param name="buffer">Pointer to pointer to string to recieve buffer.</param>
1399 //<param name="size">Pointer to count bytes read and stored in 'buffer'.</param>
1400 //<param name="tempsize">Temporary read buffer size.</param>
1401 //<returns>Success if file at 'path' was opened and bytes were read into memory.</returns>
1402 //<remarks>Memory is allocated at '*buffer'. Free with 'free'.</remarks>
1403 inline static bool load_file(const TCHAR* path,TCHAR** buffer,unsigned long* size,unsigned long tempsize = 4096)
1405 if(!path || !buffer || !size) return false;
1406 *size = 0;
1407 *buffer = 0;
1408 HANDLE file_handle = CreateFile(path,GENERIC_READ,FILE_SHARE_READ,NULL,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,NULL);
1409 if(file_handle == INVALID_HANDLE_VALUE) return false;
1410 TCHAR* temp = (TCHAR*) malloc(sizeof(TCHAR)*tempsize);
1411 if(!temp) return false;
1412 unsigned long read_bytes = 0;
1413 ZeroMemory(temp,sizeof(TCHAR)*tempsize);
1414 while(ReadFile(file_handle,(void*)temp,tempsize-1,&read_bytes,0) && read_bytes && strcatgrow(buffer,temp))
1416 *size += read_bytes;
1417 ZeroMemory(temp,sizeof(TCHAR)*tempsize);
1419 CloseHandle(file_handle);
1420 free(temp);
1421 return (*size) ? true : false;
1426 //<summary>A void pointer array. Used by various xml_node::find* functions.</summary>
1427 class pointer_array
1429 //Internal Data Members
1430 protected:
1431 unsigned int _size; //Count items.
1432 unsigned int _room; //Available space.
1433 void** _data; //The list.
1434 unsigned int _grow; //Grow by increment.
1435 public:
1436 //<summary>Default constructor.</summary>
1437 //<param name="grow">Array growth increment.</param>
1438 pointer_array(unsigned int grow = 4):
1439 _size(0),
1440 _room(0),
1441 _data(NULL),
1442 _grow(grow)
1444 _data = (void**)malloc(sizeof(void*)*_grow);
1445 _room = (_data) ? _grow : 0;
1447 ~pointer_array(){ if(_data) free(_data); }
1448 public:
1449 bool empty(){ return (_size == 0); } //True if there is no data in the array.
1450 void remove_all(){ _size = 0; } //Remove all data elements from the array.
1451 void clear() //Free any allocated memory.
1453 if(_data)
1455 _data = (void**)realloc(_data,sizeof(void*)*_grow); //Reallocate to first growth increment.
1456 _room = _grow; //Mark it as such.
1457 _size = 0; //Mark array as empty.
1460 virtual void*& operator[](unsigned int i) //Access element at subscript, or dummy value if overflow.
1462 static void* dummy = 0;
1463 if(i < _size) return _data[i]; else return dummy;
1465 unsigned int size(){ return _size; } //Count data elements in the array.
1466 virtual void* at(unsigned int i){ if(i < _size) return _data[i]; else return NULL; } //Access element at subscript, or NULL if overflow.
1467 long push_back(void* element) //Append a new element to the array.
1469 if(_data) //Fail if no array.
1471 if(_size < _room) //There is enough allocated space.
1473 _data[_size] = element; //Set it.
1474 _size++; //Increment our count of elements.
1475 return _size-1; //Return the element's subscript.
1477 else //Not enough room.
1479 void** temp = (void**)realloc(_data,sizeof(void*)*(_size+_grow)); //Grow the array.
1480 if(temp) //Reallocation succeeded.
1482 _room += _grow; //Increment available space.
1483 _data = temp; //Assign reallocated value to array pointer.
1484 _data[_size] = element; //Set the element to be added.
1485 _size++; //Increment our count of elements.
1486 return _size-1; //Return the element's subscript.
1490 return -1; //Something failed, so return a bad subscript.
1495 //<summary>A simple indentation stack.</summary>
1496 //<remarks>Used by xml_node::outer_xml function.</remarks>
1497 class indent_stack
1499 //Internal Data Members
1500 protected:
1501 TCHAR _inch; //The indent character.
1502 TCHAR* _stac; //The aggregate indent string (stack).
1503 int _size; //Current depth (avoids using '_tcslen' on push/pop).
1504 //Construction/Destruction
1505 public:
1506 //<summary>Default constructor.</summary>
1507 //<param name="c">Indent character.</param>
1508 indent_stack(TCHAR c = _T('\t')):
1509 _inch(c),
1510 _stac(0) ,
1511 _size(0)
1513 _stac = (TCHAR*)malloc(sizeof(TCHAR)); //Allocate.
1514 *_stac = 0; //Zero-terminate.
1516 //Destructor.
1517 virtual ~indent_stack(){ if(_stac) free(_stac); }
1518 //Stack Operators
1519 public:
1520 //<summary>Grow indent string by one indent character.</summary>
1521 //<remarks>Reallocates the indent string.</remarks>
1522 void push()
1524 if(_inch && _stac)
1526 _size++;
1527 _stac = (TCHAR*)realloc(_stac,sizeof(TCHAR)*(_size+1));
1528 _stac[_size-1] = _inch;
1529 _stac[_size] = 0;
1532 //<summary>Shrink the indent string by one indent character.</summary>
1533 void pop()
1535 if(_inch && _stac && _size > 0)
1537 _size--;
1538 _stac = (TCHAR*)realloc(_stac,sizeof(TCHAR)*(_size+1));
1539 _stac[_size] = 0;
1542 //<summary>Accesses the indent depth.</summary>
1543 //<returns>The current indent string, or "" if empty.</returns>
1544 const TCHAR* depth(){ return (_inch && _stac) ? _stac : _T(""); }
1548 //<summary>
1549 // Stream output. Recursively writes the given xml_node_struct structure to
1550 // the given stream. NOTE: Use this recursive implementation for debug purposes
1551 // only, since a large tree may cause a stack overflow.
1552 //</summary>
1553 //<param name="os">Reference to output stream.</param>
1554 //<param name="indent">Reference to indentation stack.</param>
1555 //<param name="node">Pointer to the node.</param>
1556 //<param name="breaks">Use linebreaks?</param>
1557 //<returns>
1558 // String data is written to stream. Indent stack may be altered.
1559 // If you want to make this prettier, and to avoid propagating whitespace,
1560 // you will have to trim excess whitespace from the PCDATA sections.
1561 //</returns>
1562 inline static void outer_xml(std::basic_ostream<TCHAR,std::char_traits<TCHAR> > & os,indent_stack& indent,xml_node_struct* node,bool breaks = true)
1564 if(node && os.good()) //There is a node and ostream is OK.
1566 register unsigned int n, i;
1567 os << indent.depth();
1568 switch(node->type)
1570 case node_dtd_attlist:
1571 if(node->name)
1573 #ifdef PUGOPT_NONSEG
1574 os << _T("<!ATTLIST ");
1575 os.write( node->name, node->name_size );
1576 #else
1577 os << _T("<!ATTLIST ") << node->name;
1578 #endif
1579 if(node->value)
1580 #ifdef PUGOPT_NONSEG
1582 os << _T(" ");
1583 os.write( node->value, node->value_size );
1585 #else
1586 os << _T(" ") << node->value;
1587 #endif
1589 os << _T(">");
1591 break;
1592 case node_dtd_element:
1593 if(node->name)
1595 #ifdef PUGOPT_NONSEG
1596 os << _T("<!ELEMENT ");
1597 os.write( node->name, node->name_size );
1598 if(node->value)
1600 os << _T(" ");
1601 os.write( node->value, node->value_size );
1603 #else
1604 os << _T("<!ELEMENT ") << node->name;
1605 if(node->value) os << _T(" ") << node->value;
1606 #endif
1607 os << _T(">");
1609 break;
1610 case node_dtd_entity:
1611 if(node->name)
1613 #ifdef PUGOPT_NONSEG
1614 os << _T("<!ENTITY ");
1615 os.write( node->name, node->name_size );
1616 if(node->value)
1618 os << _T(" ");
1619 os.write( node->value, node->value_size );
1621 #else
1622 os << _T("<!ENTITY ") << node->name;
1623 if(node->value) os << _T(" ") << node->value;
1624 #endif
1625 os << _T(">");
1627 break;
1628 case node_dtd_notation:
1629 if(node->name)
1631 #ifdef PUGOPT_NONSEG
1632 os << _T("<!NOTATION ");
1633 os.write( node->name, node->name_size );
1634 if(node->value)
1636 os << _T(" ");
1637 os.write( node->value, node->value_size );
1639 #else
1640 os << _T("<!NOTATION ") << node->name;
1641 if(node->value) os << _T(" ") << node->value;
1642 #endif
1643 os << _T(">");
1645 break;
1646 case node_doctype:
1647 os << _T("<!DOCTYPE");
1648 n = node->attributes;
1649 for(i=0; i<n; ++i)
1651 os << _T(" ");
1652 if(node->attribute[i]->name)
1653 #ifdef PUGOPT_NONSEG
1654 os.write( node->attribute[i]->name, node->attribute[i]->name_size );
1655 #else
1656 os << node->attribute[i]->name;
1657 #endif
1658 else if(node->attribute[i]->value)
1659 #ifdef PUGOPT_NONSEG
1661 os << _T("\"");
1662 os.write( node->attribute[i]->value, node->attribute[i]->value_size );
1663 os << _T("\"");
1665 #else
1666 os << _T("\"") << node->attribute[i]->value << _T("\"");
1667 #endif
1669 if(node->children)
1671 if(breaks) os << std::endl;
1672 else os << _T(" ");
1673 os << _T("[");
1674 if(breaks) os << std::endl;
1675 else os << _T(" ");
1676 n = node->children;
1677 indent.push(); //Push the indent stack.
1678 for(i=0; i<n; ++i)
1682 node->child[i] && //There is a child at i.
1684 node->child[i]->type == node_dtd_attlist || //Skip all other types.
1685 node->child[i]->type == node_dtd_element ||
1686 node->child[i]->type == node_dtd_entity ||
1687 node->child[i]->type == node_dtd_notation
1690 outer_xml(os,indent,node->child[i],breaks);
1692 indent.pop(); //Pop the indent stack.
1693 os << _T("]");
1695 else if(node->value)
1696 #ifdef PUGOPT_NONSEG
1698 os << _T(" [");
1699 os.write(node->value,node->value_size);
1700 os << _T("]");
1702 #else
1703 os << _T(" [") << node->value << _T("]");
1704 #endif
1705 os << _T(">");
1706 break;
1707 case node_pcdata:
1708 #ifdef PUGOPT_NONSEG
1709 if(node->value) os.write(node->value,node->value_size);
1710 #else
1711 if(node->value) os << node->value;
1712 #endif
1713 break;
1714 case node_cdata:
1715 #ifdef PUGOPT_NONSEG
1716 if(node->value)
1718 os << _T("<![CDATA[");
1719 os.write(node->value,node->value_size);
1720 os << _T("]]>");
1722 #else
1723 if(node->value) os << _T("<![CDATA[") << node->value << _T("]]>");
1724 #endif
1725 break;
1726 case node_include:
1727 #ifdef PUGOPT_NONSEG
1728 if(node->value)
1730 os << _T("<![INCLUDE[");
1731 os.write(node->value, node->value_size);
1732 os << _T("]]>");
1734 #else
1735 if(node->value) os << _T("<![INCLUDE[") << node->value << _T("]]>");
1736 #endif
1737 break;
1738 case node_comment:
1739 #ifdef PUGOPT_NONSEG
1740 if(node->value)
1742 os << _T("<!--");
1743 os.write(node->value, node->value_size);
1744 os << _T("-->");
1746 #else
1747 if(node->value) os << _T("<!--") << node->value << _T("-->");
1748 #endif
1749 break;
1750 case node_element:
1751 case node_pi:
1752 os << _T("<");
1753 if(node->type==node_pi) os << _T("?");
1754 if(node->name)
1755 #ifdef PUGOPT_NONSEG
1756 os.write(node->name,node->name_size);
1757 #else
1758 os << node->name;
1759 #endif
1760 else os << _T("anonymous");
1761 n = node->attributes;
1762 for(i=0; i<n; ++i)
1764 if(node->attribute[i] && node->attribute[i]->name)
1766 #ifdef PUGOPT_NONSEG
1767 os << _T(" ");
1768 os.write(node->attribute[i]->name,node->attribute[i]->name_size);
1769 if(node->attribute[i]->value)
1771 os << _T("=\"");
1772 os.write(node->attribute[i]->value,node->attribute[i]->value_size);
1773 os << _T("\"");
1775 #else
1776 os << _T(" ") << node->attribute[i]->name;
1777 if(node->attribute[i]->value) os << _T("=\"") << node->attribute[i]->value << _T("\"");
1778 #endif
1781 n = node->children;
1782 if(n && node->type == node_element)
1784 os << _T(">");
1785 if(n == 1 && node->child[0]->type == node_pcdata)
1787 if(node->child[0] && node->child[0]->value)
1788 #ifdef PUGOPT_NONSEG
1789 os.write(node->child[0]->value,node->child[0]->value_size);
1790 #else
1791 os << node->child[0]->value;
1792 #endif
1794 else
1796 if(breaks) os << std::endl;
1797 indent.push();
1798 for(i=0; i<n; ++i) pug::outer_xml(os,indent,node->child[i],breaks);
1799 indent.pop();
1800 os << indent.depth();
1802 os << _T("</");
1803 #ifdef PUGOPT_NONSEG
1804 if(node->name)
1805 os.write(node->name, node->name_size);
1806 #else
1807 if(node->name) os << node->name;
1808 #endif
1809 os << _T(">");
1811 else
1813 if(node->type==node_pi) os << _T("?>");
1814 else os << _T("/>");
1816 break;
1817 default: break;
1819 if(breaks) os << std::endl;
1820 os.flush();
1825 //<summary>Abstract iterator class for interating over a node's members.</summary>
1826 //<remarks>Used as base class for 'xml_node_iterator' and 'xml_attribute_iterator'.</remarks>
1827 template <class _Ty,class _Diff,class _Pointer,class _Reference>
1828 class xml_iterator : public std::_Ranit<_Ty,_Diff,_Pointer,_Reference>
1830 protected:
1831 xml_node_struct* _vref; //A pointer to the node over which to iterate.
1832 long _sscr; //Current subscript of element.
1833 public:
1834 xml_iterator() : _vref(0), _sscr(-1) {} //Default constructor.
1835 xml_iterator(xml_node_struct* vref,long sscr = 0) : _vref(vref), _sscr(sscr){ } //Initializing constructor.
1836 xml_iterator(const xml_iterator& r) : _vref(r._vref), _sscr(r._sscr){ } //Copy constructor.
1837 virtual ~xml_iterator(){} //Destructor.
1838 public:
1839 virtual bool good() = 0; //Internal validity of '_vref'.
1840 virtual bool oob() = 0; //Out of bounds check for '_sscr' with respect to '_vref'. Returns true if '_sscr' is O.O.B.
1841 public:
1842 virtual long subscript(){ return _sscr; } //Get subscript value;
1843 virtual void subscript(long new_subscript){ _sscr = new_subscript; } //Set subscript value;
1844 public:
1845 virtual xml_iterator& operator=(const xml_iterator& rhs){ _vref = rhs._vref; _sscr = rhs._sscr; return *this; } //Assignment.
1846 virtual bool operator==(const xml_iterator& rhs){ return (_sscr == rhs._sscr); } //True if this is equal to RHS.
1847 virtual bool operator!=(const xml_iterator& rhs){ return (_sscr != rhs._sscr); } //True if this is not equal to RHS.
1848 virtual bool operator<(const xml_iterator& rhs){ return (_sscr < rhs._sscr); } //True if this subscript is less than RHS.
1849 virtual bool operator>(const xml_iterator& rhs){ return (_sscr > rhs._sscr); } //True if this subscript is greater than RHS.
1850 virtual bool operator<=(const xml_iterator& rhs){ return (_sscr <= rhs._sscr); } //True if this subscript is less than or equal to RHS.
1851 virtual bool operator>=(const xml_iterator& rhs){ return (_sscr >= rhs._sscr); } //True if this subscript is greater than or equal to RHS.
1852 virtual xml_iterator& operator++(){ _sscr++; return *this; } //Increment the iterator (subscript).
1853 virtual xml_iterator& operator--(){ _sscr--; return *this; } //Decrement the iterator (subscript).
1854 virtual _Ty& operator*() = 0; //Dereference operator.
1855 virtual _Ty* operator->() = 0;
1858 class xml_node; //Forward decl.
1861 //<summary>Abstract tree walker class for xml_node::traverse().</summary>
1862 class xml_tree_walker
1864 protected:
1865 long _deep; //Current node depth.
1866 public:
1867 xml_tree_walker() : _deep(0) {} //Default constructor.
1868 virtual ~xml_tree_walker(){} //Destructor.
1869 public:
1870 virtual void push(){ ++_deep; } //Increment node depth.
1871 virtual void pop(){ --_deep; } //Decrement node depth.
1872 virtual long depth(){ return (_deep > 0) ? _deep : 0; } //Access node depth.
1873 public:
1874 //<summary>Callback when traverse on a given root node begins.</summary>
1875 //<returns>Returning false will abort the traversal.</returns>
1876 //<remarks>Override this to implement your own custom behavior.</remarks>
1877 virtual bool begin(xml_node&){ return true; }
1878 //<summary>Callback for each node that is hit on traverse.</summary>
1879 //<returns>Returning false will abort the traversal.</returns>
1880 virtual bool for_each(xml_node&) = 0;
1881 //<summary>Callback when traverse on a given root node ends.</summary>
1882 //<returns>Returning false will abort the traversal.</returns>
1883 //<remarks>Override this to implement your own custom behavior.</remarks>
1884 virtual bool end(xml_node&){ return true; }
1888 //<summary>Provides a light-weight wrapper for manipulating xml_attribute_struct structures.</summary>
1889 //<remarks>
1890 // Note: xml_attribute does not create any memory for the attribute it wraps;
1891 // it only wraps a pointer to an existing xml_attribute_struct.
1892 //</remarks>
1893 class xml_attribute
1895 //Internal Data Members
1896 protected:
1897 xml_attribute_struct* _attr; //The internal attribute pointer.
1898 //Construction/Destruction
1899 public:
1900 xml_attribute() : _attr(NULL) {} //Default constructor.
1901 xml_attribute(xml_attribute_struct* attr) : _attr(attr) {} //Initializing constructor.
1902 xml_attribute(const xml_attribute& r) : _attr(r._attr) {} //Copy constructor.
1903 virtual ~xml_attribute(){} //Destructor.
1904 //Operators
1905 public:
1906 void attach(xml_attribute_struct* v){ _attr = v; }
1907 xml_attribute& operator=(const xml_attribute& r){ _attr = r._attr; return *this; } //Assign internal pointer.
1908 bool operator==(const xml_attribute& r){ return (_attr == r._attr); } //Compare internal pointer.
1909 bool operator!=(const xml_attribute& r){ return (_attr != r._attr); }
1910 operator xml_attribute_struct*(){ return _attr; }
1911 //<summary>Cast attribute value as std::string. If not found, return empty.</summary>
1912 //<returns>The std::string attribute value, or empty.</returns>
1913 //<remarks>Note: Modifying this will not change the value, e.g. read only.</remarks>
1914 operator std::string()
1916 std::string temp;
1917 if(!empty() && has_value())
1919 #ifdef PUGOPT_NONSEG
1920 temp.append(_attr->value,_attr->value_size);
1921 #else
1922 temp = _attr->value;
1923 #endif
1925 return temp;
1927 //<summary>Cast attribute value as integral character string. If not found, return NULL.</summary>
1928 //<returns>Integral character string attribute value, or NULL.</returns>
1929 //<remarks>Warning: Modifying this may corrupt portions of the document tree.</remarks>
1930 operator const TCHAR*()
1932 if(empty() || !has_value()) return NULL;
1933 return _attr->value;
1935 //<summary>Cast attribute value as long. If not found, return 0.</summary>
1936 //<returns>Attribute value as long, or 0.</returns>
1937 //<remarks>Note: Modifying this will not change the value, e.g. read only.</remarks>
1938 operator long()
1940 if(empty() || !has_value()) return 0;
1941 #ifdef PUGOPT_NONSEG
1942 TCHAR temp[PUGDEF_ATTR_VALU_SIZE];
1943 unsigned int valulen = sizeof(temp)-1;
1944 const unsigned int maxlen = valulen ? min(valulen,_attr->value_size) : _attr->value_size;
1945 _tcsncpy(temp,_attr->value,maxlen);
1946 temp[maxlen] = 0;
1947 return _tcstol(temp,NULL,10);
1948 #else
1949 return _tcstol(_attr->value,NULL,10);
1950 #endif
1952 //<summary>Cast attribute value as double. If not found, return 0.0.</summary>
1953 //<returns>Attribute value as double, or 0.0.</returns>
1954 //<remarks>Note: Modifying this will not change the value, e.g. read only.</remarks>
1955 operator double()
1957 if(empty() || !has_value()) return 0.0;
1958 #ifdef PUGOPT_NONSEG
1959 TCHAR temp[PUGDEF_ATTR_VALU_SIZE];
1960 unsigned int valulen = sizeof(temp)-1;
1961 const unsigned int maxlen = valulen ? min(valulen,_attr->value_size) : _attr->value_size;
1962 _tcsncpy(temp,_attr->value,maxlen);
1963 temp[maxlen] = 0;
1964 return _tcstod(temp,0);
1965 #else
1966 return _tcstod(_attr->value,0);
1967 #endif
1969 //<summary>Cast attribute value as bool. If not found, return false.</summary>
1970 //<returns>Attribute value as bool, or false.</returns>
1971 //<remarks>Note: Modifying this will not change the value, e.g. read only.</remarks>
1972 operator bool()
1974 if(empty() || !has_value()) return false;
1975 if(*(_attr->value))
1977 return //Only look at first char:
1979 *(_attr->value) == _T('1') || //1*
1980 *(_attr->value) == _T('t') || //t* (true)
1981 *(_attr->value) == _T('T') || //T* (True|true)
1982 *(_attr->value) == _T('y') || //y* (yes)
1983 *(_attr->value) == _T('Y') //Y* (Yes|YES)
1985 ? true : false; //Return true if matches above, else false.
1988 //<summary>Set attribute to std::string.</summary>
1989 //<param name="rhs">Value std::string to set.</param>
1990 //<returns>Reference to xml_attribute.</returns>
1991 xml_attribute& operator=(const std::string& rhs){ value(rhs.c_str()); return *this; }
1992 //<summary>Set attribute to string.</summary>
1993 //<param name="rhs">Value string to set.</param>
1994 //<returns>Reference to xml_attribute.</returns>
1995 xml_attribute& operator=(const TCHAR* rhs){ if(rhs) value(rhs); return *this; }
1996 //<summary>Set attribute to long.</summary>
1997 //<param name="rhs">Value long to set.</param>
1998 //<returns>Reference to xml_attribute.</returns>
1999 xml_attribute& operator=(long rhs)
2001 TCHAR temp[32] = {0};
2002 _stprintf(temp,_T("%ld"),rhs);
2003 value(temp);
2004 return *this;
2006 //<summary>Set attribute to double.</summary>
2007 //<param name="rhs">Value double to set.</param>
2008 //<returns>Reference to xml_attribute.</returns>
2009 xml_attribute& operator=(double rhs)
2011 TCHAR temp[32] = {0};
2012 _stprintf(temp,_T("%lf"),rhs);
2013 value(temp);
2014 return *this;
2016 //<summary>Set attribute to bool.</summary>
2017 //<param name="rhs">Value bool to set.</param>
2018 //<returns>Reference to xml_attribute.</returns>
2019 xml_attribute& operator=(bool rhs)
2021 value(rhs?_T("true"):_T("false"));
2022 return *this;
2024 //<summary>Right-shift attribute value to std::string.</summary>
2025 //<param name="rhs">Reference to std::string to set.</param>
2026 //<returns>Reference to xml_attribute.</returns>
2027 xml_attribute& operator>>(std::string& rhs)
2029 #ifdef PUGOPT_NONSEG
2030 rhs.clear();
2031 rhs.append(_attr->value,_attr->value_size);
2032 #else
2033 rhs = value();
2034 #endif
2035 return *this;
2037 //<summary>Right-shift attribute value to long.</summary>
2038 //<param name="rhs">Reference to long to set.</param>
2039 //<returns>Reference to xml_attribute.</returns>
2040 xml_attribute& operator>>(long& rhs){ rhs = (long)*this; return *this; }
2041 //<summary>Right-shift attribute value to double.</summary>
2042 //<param name="rhs">Reference to double to set.</param>
2043 //<returns>Reference to xml_attribute.</returns>
2044 xml_attribute& operator>>(double& rhs){ rhs = (double)*this; return *this; }
2045 //<summary>Right-shift attribute value to bool.</summary>
2046 //<param name="rhs">Reference to bool to set.</param>
2047 //<returns>Reference to xml_attribute.</returns>
2048 xml_attribute& operator>>(bool& rhs){ rhs = (bool)*this; return *this; }
2049 //<summary>Left-shift attribute value to long.</summary>
2050 //<param name="lhs">Reference to long to set.</param>
2051 //<param name="rhs">Reference to xml_attribute to read.</param>
2052 //<returns>Reference to long.</returns>
2053 friend long& operator<<(long& lhs,xml_attribute& rhs){ lhs = (long)rhs; return lhs; }
2054 //<summary>Left-shift attribute value to double.</summary>
2055 //<param name="lhs">Reference to double to set.</param>
2056 //<param name="rhs">Reference to xml_attribute to read.</param>
2057 //<returns>Reference to double.</returns>
2058 friend double& operator<<(double& lhs,xml_attribute& rhs){ lhs = (double)rhs; return lhs; }
2059 //<summary>Left-shift attribute value to bool.</summary>
2060 //<param name="lhs">Reference to bool to set.</param>
2061 //<param name="rhs">Reference to xml_attribute to read.</param>
2062 //<returns>Reference to bool.</returns>
2063 friend bool& operator<<(bool& lhs,xml_attribute& rhs){ lhs = (bool)rhs; return lhs; }
2064 //<summary>Left-shift long to attribute value.</summary>
2065 //<param name="lhs">Reference to xml_attribute to set.</param>
2066 //<param name="rhs">Reference to long to read.</param>
2067 //<returns>Reference to xml_attribute.</returns>
2068 friend xml_attribute& operator<<(xml_attribute& lhs,const long rhs){ lhs = rhs; return lhs; }
2069 //<summary>Left-shift double to attribute value.</summary>
2070 //<param name="lhs">Reference to xml_attribute to set.</param>
2071 //<param name="rhs">Reference to double to read.</param>
2072 //<returns>Reference to xml_attribute.</returns>
2073 friend xml_attribute& operator<<(xml_attribute& lhs,const double& rhs){ lhs = rhs; return lhs; }
2074 //<summary>Left-shift bool to attribute value.</summary>
2075 //<param name="lhs">Reference to xml_attribute to set.</param>
2076 //<param name="rhs">Reference to bool to read.</param>
2077 //<returns>Reference to xml_attribute.</returns>
2078 friend xml_attribute& operator<<(xml_attribute& lhs,const bool& rhs){ lhs = rhs; return lhs; }
2079 public:
2080 bool empty(){ return (_attr == NULL); } //True if the internal xml_attribute_struct pointer is NULL.
2081 bool has_name(){ return (!empty() && _attr->name); } //True if the attribute has a name.
2082 bool has_value(){ return (!empty() && _attr->value); } //True if the attribute has a value.
2083 #ifdef PUGOPT_NONSEG
2084 bool has_name(const TCHAR* name) { return (name && !empty() && has_name() && _tcsncmp(_attr->name,name,_attr->name_size)==0); } //Is named 'name'.
2085 bool has_value(const TCHAR* value) { return (value && !empty() && has_value() && _tcsncmp(_attr->value,value,_attr->value_size)==0); } //Has value 'value'.
2086 #else
2087 bool has_name(const TCHAR* name) { return (name && !empty() && has_name() && _tcscmp(_attr->name,name)==0); } //Is named 'name'.
2088 bool has_value(const TCHAR* value) { return (value && !empty() && has_value() && _tcscmp(_attr->value,value)==0); } //Has value 'value'.
2089 #endif
2090 public:
2091 const TCHAR* name(){ return (!empty() && _attr->name) ? _attr->name : _T(""); } //Access the attribute name.
2092 #ifdef PUGOPT_NONSEG
2093 const unsigned int name_size(){ return (!empty()) ? _attr->name_size : 0; } //Access the attribute name length (for PUGOPT_NONSEG).
2094 #endif
2095 bool name(TCHAR* new_name) //Set the attribute name.
2097 if(!empty() && new_name)
2098 #ifdef PUGOPT_NONSEG
2099 return strcpyinsitu(&_attr->name,new_name,&_attr->name_insitu,_attr->name_size);
2100 #else
2101 return strcpyinsitu(&_attr->name,new_name,&_attr->name_insitu);
2102 #endif
2103 return false;
2105 const TCHAR* value(){ return (!empty()) ? _attr->value : _T(""); } //Access the attribute value.
2106 #ifdef PUGOPT_NONSEG
2107 const unsigned int value_size(){ return (!empty()) ? _attr->value_size : 0; } //Access the attribute name length (for PUGOPT_NONSEG).
2108 #endif
2109 bool value(const TCHAR* new_value) //Set the attribute value.
2111 if(!empty() && new_value)
2112 #ifdef PUGOPT_NONSEG
2113 return strcpyinsitu(&_attr->value,new_value,&_attr->value_insitu,_attr->value_size);
2114 #else
2115 return strcpyinsitu(&_attr->value,new_value,&_attr->value_insitu);
2116 #endif
2117 return false;
2122 class xml_node; //Forward declaration.
2125 //<summary>Forward wrapper for any as-yet undefined class.</summary>
2126 //<remarks>
2127 // Used by xml_node_iterator, and xml_attribute_iterator to assist with
2128 // operator->(), and operator*() mapping to xml_node and xml_attribute
2129 // types.
2130 //</remarks>
2131 template <typename TYPE> class forward_class
2133 protected:
2134 TYPE* _obj; //The class, internal.
2135 public:
2136 forward_class() : _obj(NULL) { _obj = new TYPE(); } //Default constructor.
2137 forward_class(const TYPE& r) : _obj(NULL) { _obj = new TYPE(r); } //Copy constructor.
2138 virtual ~forward_class(){ if(_obj) delete _obj; } //Destructor.
2139 public:
2140 TYPE& operator* (){ return *_obj; } //Dereference to the class.
2141 TYPE* operator->(){ return _obj; } //Class member selection.
2142 operator TYPE (){ return *_obj; } //Cast as class type.
2143 operator TYPE&(){ return *_obj; } //Cast as class type reference.
2144 operator TYPE*(){ return _obj; } //Cast as class type pointer.
2148 //<summary>Provides a light-weight wrapper for manipulating xml_node_struct structures.</summary>
2149 class xml_node
2151 //Internal Data Members
2152 protected:
2154 xml_node_struct* _root; //Pointer to node root.
2155 xml_node_struct _dummy; //Utility.
2157 //Construction/Destruction
2158 public:
2160 //<summary>Default constructor.</summary>
2161 //<remarks>
2162 // Node root points to a dummy 'xml_node_struct' structure. Test for this
2163 // with 'empty'.
2164 //</remarks>
2165 xml_node(): _root(0)
2167 memset(&_dummy,0,sizeof(xml_node_struct));
2168 _dummy.type = node_null;
2169 _dummy.parent = &_dummy;
2170 _root = &_dummy;
2173 //<summary>Construct, wrapping the given 'xml_node_struct' pointer.</summary>
2174 //<param name="p">Pointer to node to wrap.</param>
2175 //<remarks>It is possible that 'p' is NULL, so test for this with 'empty'.</remarks>
2176 xml_node(xml_node_struct* p): _root(p) { memset(&_dummy,0,sizeof(xml_node_struct)); }
2178 //<summary>Copy constructor.</summary>
2179 //<param name="r">Reference to node.</param>
2180 //<remarks>
2181 // Only the root pointer is assigned, so both classes now in fact point
2182 // to the same structure.
2183 //</remarks>
2184 xml_node(const xml_node& r): _root(r._root) {}
2186 //<summary>Destructor.</summary>
2187 virtual ~xml_node(){}
2189 //<summary>Attach to the given structure.</summary>
2190 //<param name="p">Pointer to node structure to wrap.</param>
2191 //<returns>Pointer to previous node structure.</returns>
2192 xml_node_struct* attach(xml_node_struct* p)
2194 xml_node_struct* prev = _root;
2195 _root = p;
2196 return prev;
2199 //Iteration
2200 public:
2202 //<summary>Child node iterator.</summary>
2203 class xml_node_iterator : public xml_iterator<xml_node,long,xml_node*,xml_node&>
2205 protected:
2206 forward_class<xml_node> _wrap; //Wrapper for xml_node.
2207 public:
2208 xml_node_iterator() : _wrap(), xml_iterator<xml_node,long,xml_node*,xml_node&>() {} //Default constructor.
2209 xml_node_iterator(xml_node_struct* vref,long sscr = 0) : _wrap(), xml_iterator<xml_node,long,xml_node*,xml_node&>(vref,sscr) { } //Initializing constructor.
2210 xml_node_iterator(const xml_node_iterator& r) : _wrap(), xml_iterator<xml_node,long,xml_node*,xml_node&>(r) { } //Copy constructor.
2211 virtual bool good() //Internal validity.
2215 _vref != 0 && //Pointing to some node.
2216 _vref->child != 0 && //The node has an array of children.
2217 _vref->children > 0 //There are 1 or more children in the array.
2219 return true;
2220 return false;
2222 virtual bool oob() //Out of bounds check.
2226 !good() || //There is no data over which to iterate.
2227 _sscr < 0 || //Subscript is out of range.
2228 _sscr >= (long)_vref->children
2230 return true;
2231 return false;
2233 //<summary>Pointer dereference for current xml_node.<summary>
2234 //<returns>
2235 // Reference to the internal xml_node object, which wraps the
2236 // xml_node_struct corresponding to the node at the
2237 // current subscript.
2238 //</returns>
2239 virtual xml_node& operator*()
2241 if(!oob()) _wrap->attach(_vref->child[_sscr]);
2242 else _wrap->attach(NULL);
2243 return (xml_node&)_wrap;
2245 virtual xml_node* operator->() //Member selection for current xml_node.
2247 if(!oob()) _wrap->attach(_vref->child[_sscr]);
2248 else _wrap->attach(NULL);
2249 return (xml_node*)_wrap;
2253 //<summary>Attribute iterator.</summary>
2254 class xml_attribute_iterator : public xml_iterator<xml_attribute,long,xml_attribute*,xml_attribute&>
2256 protected:
2257 forward_class<xml_attribute> _wrap;
2258 public:
2259 xml_attribute_iterator() : _wrap(), xml_iterator<xml_attribute,long,xml_attribute*,xml_attribute&>() {} //Default constructor.
2260 xml_attribute_iterator(xml_node_struct* vref,long sscr = 0) : _wrap(), xml_iterator<xml_attribute,long,xml_attribute*,xml_attribute&>(vref,sscr) { } //Initializing constructor.
2261 xml_attribute_iterator(const xml_attribute_iterator& r) : _wrap(), xml_iterator<xml_attribute,long,xml_attribute*,xml_attribute&>(r) { } //Copy constructor.
2262 virtual bool good() //Internal validity check.
2266 _vref != 0 && //Pointing to some node.
2267 _vref->attribute != 0 && //The node has an array of children.
2268 _vref->attributes > 0 //There are 1 or more children in the array.
2270 return true;
2271 return false;
2273 virtual bool oob() //Out of bounds check.
2277 !good() || //There is no data over which to iterate.
2278 _sscr < 0 || //Subscript is out of range.
2279 _sscr >= (long)_vref->attributes //For 'end'
2281 return true;
2282 return false;
2284 //<summary>Pointer dereference for current xml_attribute.</summary>
2285 //<returns>
2286 // Reference to the internal xml_attribute object, which wraps the
2287 // xml_attribute_struct corresponding to the attribute at the
2288 // current subscript.
2289 //</returns>
2290 virtual xml_attribute& operator*()
2292 if(!oob()) _wrap->attach(_vref->attribute[_sscr]);
2293 else _wrap->attach(NULL);
2294 return (xml_attribute&)_wrap;
2296 //<summary>Member selection for current xml_attribute.</summary>
2297 //<returns></returns>
2298 virtual xml_attribute* operator->()
2300 if(!oob()) _wrap->attach(_vref->attribute[_sscr]);
2301 else _wrap->attach(NULL);
2302 return (xml_attribute*)_wrap;
2306 //<summary>Base iterator type (for child nodes). Same as 'child_iterator'.</summary>
2307 typedef xml_node_iterator iterator;
2308 //<summary>Base iterator type (for child nodes). Same as 'iterator'.</summary>
2309 typedef xml_node_iterator child_iterator;
2310 //<summary>Base iterator type (for sibling nodes). Same as 'iterator'.</summary>
2311 typedef xml_node_iterator sibling_iterator;
2312 //<summary>Attribute iterator type.</summary>
2313 typedef xml_attribute_iterator attribute_iterator;
2315 //<summary>Access the begin iterator for this node's collection of child nodes.</summary>
2316 //<returns>The begin iterator for this node's collection of child nodes.</returns>
2317 //<remarks>Same as 'children_begin'.</remarks>
2318 iterator begin(){ return iterator(_root,0); }
2319 //<summary>Access the end iterator for this node's collection of child nodes.</summary>
2320 //<returns>The end iterator for this node's collection of child nodes.</returns>
2321 //<remarks>Same as 'children_end'.</remarks>
2322 iterator end(){ return iterator(_root,_root->children); }
2323 //<summary>Erase the given node from node's collection of child nodes.</summary>
2324 //<returns>The begin iterator for this node's collection of child nodes.</returns>
2325 //<remarks>Same as 'children_erase'.</remarks>
2326 iterator erase(iterator where){ remove_child((unsigned int)where.subscript()); return iterator(_root,0); }
2328 //<summary>Access the begin iterator for this node's collection of child nodes.</summary>
2329 //<returns>The begin iterator for this node's collection of child nodes.</returns>
2330 //<remarks>Same as 'begin'.</remarks>
2331 child_iterator children_begin(){ return child_iterator(_root,0); }
2332 //<summary>Access the end iterator for this node's collection of child nodes.</summary>
2333 //<returns>The end iterator for this node's collection of child nodes.</returns>
2334 //<remarks>Same as 'end'.</remarks>
2335 child_iterator children_end(){ return child_iterator(_root,_root->children); }
2336 //<summary>Erase the given node from node's collection of child nodes.</summary>
2337 //<returns>The begin iterator for this node's collection of child nodes.</returns>
2338 //<remarks>Same as 'erase'.</remarks>
2339 child_iterator children_erase(child_iterator where){ remove_child((unsigned int)where.subscript()); return child_iterator(_root,0); }
2341 //<summary>Access the begin iterator for this node's collection of attributes.</summary>
2342 //<returns>The begin iterator for this node's collection of attributes.</returns>
2343 attribute_iterator attributes_begin(){ return attribute_iterator(_root,0); }
2344 //<summary>Access the end iterator for this node's collection of attributes.</summary>
2345 //<returns>The end iterator for this node's collection of attributes.</returns>
2346 attribute_iterator attributes_end(){ return attribute_iterator(_root,_root->attributes); }
2347 //<summary>Erase the given attribute from node's collection of attributes.</summary>
2348 //<returns>The begin iterator for this node's collection of attributes.</returns>
2349 attribute_iterator attributes_erase(attribute_iterator where){ remove_attribute((unsigned int)where.subscript()); return attribute_iterator(_root,0); }
2351 //<summary>Access the begin iterator for this node's collection of siblings.</summary>
2352 //<returns>The begin iterator for this node's collection of siblings.</returns>
2353 sibling_iterator siblings_begin(){ if(!empty()) return sibling_iterator(_root->parent,0); return sibling_iterator(); }
2354 //<summary>Access the end iterator for this node's collection of siblings.</summary>
2355 //<returns>The end iterator for this node's collection of siblings.</returns>
2356 sibling_iterator siblings_end(){ if(!empty()) return sibling_iterator(_root->parent,_root->parent->children); return sibling_iterator(); }
2357 //<summary>Erase the given sibling from node's collection of siblings.</summary>
2358 //<returns>The begin iterator for this node's collection of siblings.</returns>
2359 sibling_iterator siblings_erase(sibling_iterator where){ parent().remove_child((unsigned int)where.subscript()); return iterator(_root->parent,0); }
2361 //Overloaded Operators
2362 public:
2364 operator xml_node_struct*(){ return _root; } //Cast as xml_node_struct pointer.
2365 operator void*(){ return (void*)_root; } //Cast root as void*.
2366 xml_node& operator=(const xml_node& r){ _root = r._root; return *this; } //Assign to xml_node_struct pointer.
2367 bool operator==(const xml_node& r){ return (_root == r._root); } //True if this has the same internal xml_node_struct pointer value.
2368 xml_node operator[](unsigned int i){ return child(i); } //Access the child at subscript.
2370 //Node Classification
2371 public:
2373 bool empty() { return (_root == 0 || _root->type == node_null); } //Node pointer is null, or type is node_null. Same as type_null.
2374 bool type_null() { return empty(); } //Node pointer is null, or type is node_null. Same as empty.
2375 bool type_document() { return (_root && _root == _root->parent && _root->type == node_document); } //Node is tree root.
2376 bool type_element() { return (_root && _root->type == node_element); } //Node is an element.
2377 bool type_comment() { return (_root && _root->type == node_comment); } //Node is a comment.
2378 bool type_pcdata() { return (_root && _root->type == node_pcdata); } //Node is PCDATA.
2379 bool type_cdata() { return (_root && _root->type == node_cdata); } //Node is CDATA.
2380 bool type_include() { return (_root && _root->type == node_include); } //Node is INCLUDE.
2381 bool type_pi() { return (_root && _root->type == node_pi); } //Node is a processing instruction.
2382 bool type_doctype() { return (_root && _root->type == node_doctype); } //Node is DOCTYPE.
2383 bool type_dtd_item() { return (_root && _root->type > node_doctype); } //Node is NODE_DTD_*.
2384 bool type_dtd_attlist() { return (_root && _root->type == node_dtd_attlist); } //Node is node_dtd_attlist.
2385 bool type_dtd_element() { return (_root && _root->type == node_dtd_element); } //Node is node_dtd_element.
2386 bool type_dtd_entity() { return (_root && _root->type == node_dtd_entity); } //Node is node_dtd_entity.
2387 bool type_dtd_notation() { return (_root && _root->type == node_dtd_notation); } //Node is node_dtd_notation.
2389 //Member Inventory
2390 public:
2392 bool has_value() { return (!empty() && _root->value != 0); } //Node has data (comment, CDATA or PCDATA).
2393 bool has_child_nodes() { return (!empty() && children() > 0); } //Node has 1 or more children.
2394 bool has_attributes() { return (!empty() && attributes() > 0); } //Node has 1 or more attributes.
2395 bool has_siblings() { return (!empty() && siblings() > 0); } //Node has one or more siblings.
2396 bool has_name() { return (!empty() && _root->name != 0); } //Node has a name.
2397 bool has_name(const std::string& name) const { return has_name(name.c_str()); } //Node is named 'name'.
2398 bool has_attribute(const std::string& name) { return has_attribute(name.c_str()); } //Node has an attribute named 'name'.
2399 #ifdef PUGOPT_NONSEG
2400 bool has_name(const TCHAR* name) const { return (name && _root && _root->name && _tcsncmp(_root->name,name,_root->name_size)==0); } //Node is named 'name'.
2401 #else
2402 bool has_name(const TCHAR* name) const { return (name && _root && _root->name && strcmpwild(name,_root->name)==0); } //Node is named 'name'.
2403 #endif
2404 bool has_attribute(const TCHAR* name){ return (mapto_attribute_idx(name) > -1); } //Node has an attribute named name.
2406 //Member Accessors
2407 public:
2409 #ifdef PUGOPT_NONSEG
2411 //<summary>Access node name if any.</summary>
2412 //<returns>Name, or dummy value if the no name.</returns>
2413 //<remarks>Only returns up to 'PUGDEF_ELEM_NAME_SIZE' chars of name.</remarks>
2414 const TCHAR* name()
2416 static TCHAR temp[PUGDEF_ELEM_NAME_SIZE] = {0};
2417 if(has_name())
2419 _tcsncpy(temp,_root->name,_root->name_size);
2420 temp[_root->name_size<PUGDEF_ELEM_NAME_SIZE?_root->name_size:(PUGDEF_ELEM_NAME_SIZE-1)] = 0;
2421 return temp;
2423 return _T("");
2425 unsigned int name_size(){ return (has_name()) ? _root->name_size : 0; } //Get node name length if any, else 0.
2426 unsigned int value_size(){ return (has_value()) ? _root->value_size : 0; } //Get node value length if any, else 0.
2427 inline bool matches_attribute_name(const TCHAR* name,const unsigned int namelen,const int i) const { return (_tcsncmp(name,_root->attribute[i]->name,max(namelen,_root->attribute[i]->name_size))==0); } //There is an attribute at 'i' named 'name'.
2428 inline bool matches_child_name(const TCHAR* name,const unsigned int namelen,const int i) const { return (_tcsncmp(name,_root->child[i]->name,max(namelen,_root->child[i]->name_size))==0); } //There is a child at 'i' named 'name'.
2429 inline bool matches_name(const TCHAR* name,const unsigned int namelen,xml_node_struct* node) const { return (_tcsncmp(name,node->name,max(namelen,node->name_size))==0); } //This is named 'name'.
2430 inline bool matches_value(const TCHAR* data,const unsigned int datalen,xml_node_struct* node) const { return (_tcsncmp(data,node->value,max(datalen,node->value_size))==0); } //This is valued 'value'.
2431 inline bool matches_attribute_name(const TCHAR* name,const unsigned int namelen,xml_attribute_struct* attr) const { return (_tcsncmp(name,attr->name,max(namelen,attr->name_size))==0); } //The given attribute is named 'name'.
2432 inline bool matches_attribute_name_value(const TCHAR* value,const unsigned int valulen,xml_attribute_struct* attr) const { return (_tcsncmp(value,attr->value,max(valulen,attr->value_size))==0); } //The given attribute is valued 'value'.
2433 #else
2434 const TCHAR* name(){ return (has_name()) ? _root->name : _T(""); } //Access pointer to node name if any, else empty string.
2435 inline bool matches_attribute_name(const TCHAR* name,const unsigned int i) const { return (strcmpwild(name,_root->attribute[i]->name)==0); } //There is an attribute at 'i' named 'name'.
2436 inline bool matches_child_name(const TCHAR* name,const unsigned int i) const { return (strcmpwild(name,_root->child[i]->name)==0); } //There is a child at 'i' named 'name'.
2437 inline bool matches_name(const TCHAR* name,xml_node_struct* node) const { return (strcmpwild(name,node->name)==0); } //This is named 'name'.
2438 inline bool matches_value(const TCHAR* data,xml_node_struct* node) const { return (strcmpwild(data,node->value)==0); } //This is valued 'value'.
2439 inline bool matches_attribute_name(const TCHAR* attribute,xml_attribute_struct* attr) const { return (strcmpwild(attribute,attr->name)==0); } //The given attribute is named 'name'.
2440 inline bool matches_attribute_name_value(const TCHAR* value,xml_attribute_struct* attr) const { return (strcmpwild(value,attr->value)==0); } //The given attribute is valued 'value'.
2441 #endif
2442 xml_node_type type() const { return (_root) ? (xml_node_type)_root->type : node_null; } //Access node entity type.
2443 const TCHAR* value() { return (has_value()) ? _root->value : _T(""); } //Access pointer to data if any, else empty string.
2444 unsigned int children() const { return _root->children; } //Access node's child count.
2445 xml_node child(unsigned int i){ return (i < children()) ? xml_node(_root->child[i]) : xml_node(); } //Access child node at subscript as xml_node or xml_node(NULL) if bad subscript.
2446 unsigned int attributes() const { return _root->attributes; } //Access node's attribute count.
2447 xml_attribute attribute(unsigned int i){ return (i < attributes()) ? xml_attribute(_root->attribute[i]) : xml_attribute(); } //Access attribute at subscript if any, else empty attribute.
2448 //<summary>Access or create the attribute having 'name'.</summary>
2449 //<param name="name">Name of attribute to access/create.</param>
2450 //<returns>Reference to xml_attribute wrapper.</returns>
2451 xml_attribute attribute(const std::string& name){ return attribute(name.c_str()); }
2452 //<summary>Access or create the attribute having 'name'.</summary>
2453 //<param name="name">Name of attribute to access/create.</param>
2454 //<returns>Reference to xml_attribute wrapper.</returns>
2455 xml_attribute attribute(const TCHAR* name)
2457 xml_attribute_struct* attr = mapto_attribute_ptr(name);
2458 if(!attr) attr = append_attribute(name,_T(""));
2459 return xml_attribute(attr);
2461 const unsigned int siblings(){ return (!type_document()) ? _root->parent->children : 0; } //Access node's sibling count (parent's child count).
2462 xml_node sibling(unsigned int i){ return (!type_document() && i < siblings()) ? xml_node(_root->parent->child[i]) : xml_node(); } //Access sibling node at subscript as xml_node or xml_node(NULL) if bad subscript.
2463 xml_node parent(){ return (!type_document()) ? xml_node(_root->parent) : xml_node(); } //Access node's parent if any, else xml_node(NULL)
2465 //<summary>Return the first child that has data's data. If none, return NULL.</summary>
2466 //<param name="value">Returns a copy of the data.</param>
2467 //<param name="valuelen">Specifies the maximum number of characters to copy into value.</param>
2468 //<returns>Pointer to value if exists, else NULL.</returns>
2469 //<remarks>
2470 // Used to get the PCDATA for the current element. This handles elements
2471 // like: &lt;LINE&gt;&lt;STAGEDIR&gt;Aside&lt;/STAGEDIR&gt;Thy father,
2472 // Pompey, would ne'er have&lt;/LINE&gt;, where 'this' points to &lt;LINE&gt;.
2473 //</remarks>
2474 TCHAR* child_value(TCHAR* value,const unsigned int valuelen)const
2476 if(_root->children)
2478 for(register unsigned int i=0; i < _root->children; ++i)
2480 xml_node_struct* node = _root->child[i];
2481 if(node->value)
2483 const unsigned int n =
2484 #ifdef PUGOPT_NONSEG
2485 (std::min)(valuelen,node->value_size);
2486 #else
2487 (std::min)(valuelen,unsigned(_tcslen(node->value)));
2488 #endif
2489 _tcsncpy(value,node->value,n);
2490 value[n] = 0;
2491 break;
2494 return value;
2496 return NULL;
2499 //Name-To-Object Mapping
2500 public:
2502 //<summary>Map an attribute name to a pointer to that attribute, if found.</summary>
2503 //<param name="name">Reference to name of attribute to find.</param>
2504 //<returns>Pointer to attribute, or NULL if not found.</returns>
2505 //<remarks>Implement your own hash table if you have a great many attributes.</remarks>
2506 xml_attribute_struct* mapto_attribute_ptr(const std::string& name){ return mapto_attribute_ptr(name.c_str()); }
2508 //<summary>Map an attribute name to a pointer to that attribute, if found.</summary>
2509 //<param name="name">Pointer to name of attribute to find.</param>
2510 //<returns>Pointer to attribute, or NULL if not found.</returns>
2511 //<remarks>Implement your own hash table if you have a great many attributes.</remarks>
2512 xml_attribute_struct* mapto_attribute_ptr(const TCHAR* name)
2514 if(!_root || !name) return NULL;
2515 register unsigned int n = _root->attributes;
2516 #ifdef PUGOPT_NONSEG
2517 const int namelen = _tcslen(name);
2518 #endif
2519 for(register unsigned int i=0; i<n; ++i)
2520 #ifdef PUGOPT_NONSEG
2521 if(matches_attribute_name(name,namelen,i))
2522 #else
2523 if(matches_attribute_name(name,i))
2524 #endif
2525 return _root->attribute[i];
2526 return NULL;
2529 //<summary>Map an attribute name to the index of that attribute, if found.</summary>
2530 //<param name="name">Pointer to name of attribute to find.</param>
2531 //<returns>Index of attribute, or -1 if not found.</returns>
2532 //<remarks>Implement your own hash table if you have a great many attributes.</remarks>
2533 int mapto_attribute_idx(const TCHAR* name)
2535 if(!_root || !name) return -1;
2536 register unsigned int n = _root->attributes;
2537 #ifdef PUGOPT_NONSEG
2538 const int namelen = _tcslen(name);
2539 #endif
2540 for(register unsigned int i=0; i<n; ++i)
2541 #ifdef PUGOPT_NONSEG
2542 if(matches_attribute_name(name,namelen,i))
2543 #else
2544 if(matches_attribute_name(name,i))
2545 #endif
2546 return i;
2547 return -1;
2550 //<summary>Map a child name to a pointer to the first instance, if found.</summary>
2551 //<param name="name">Reference to name of child to find.</param>
2552 //<returns>Index of child, or -1 if not found.</returns>
2553 //<remarks>Implement your own hash table if you have a great many children.</remarks>
2554 xml_node_struct* mapto_child_ptr(const std::string& name){ return mapto_child_ptr(name.c_str()); }
2556 //<summary>Map a child name to a pointer to the first instance, if found.</summary>
2557 //<param name="name">Pointer to name of child to find.</param>
2558 //<returns>Index of child, or -1 if not found.</returns>
2559 //<remarks>Implement your own hash table if you have a great many children.</remarks>
2560 xml_node_struct* mapto_child_ptr(const TCHAR* name)
2562 if(!_root || !name) return NULL;
2563 register unsigned int n = _root->children;
2564 #ifdef PUGOPT_NONSEG
2565 const int namelen = _tcslen(name);
2566 #endif
2567 for(register unsigned int i=0; i<n; ++i)
2571 _root->child[i]->name &&
2572 #ifdef PUGOPT_NONSEG
2573 matches_child_name(name,namelen,i)
2574 #else
2575 matches_child_name(name,i)
2576 #endif
2578 return _root->child[i];
2580 return NULL;
2583 //<summary>Map a child name to the index of the first instance, if found.</summary>
2584 //<param name="name">Reference to name of child to find.</param>
2585 //<returns>Index of child, or -1 if not found.</returns>
2586 //<remarks>Implement your own hash table if you have a great many children.</remarks>
2587 int mapto_child_idx(const std::string& name){ return mapto_child_idx(name.c_str()); }
2589 //<summary>Map a child name to the index of the first instance, if found.</summary>
2590 //<param name="name">Pointer to name of child to find.</param>
2591 //<returns>Index of child, or -1 if not found.</returns>
2592 //<remarks>Implement your own hash table if you have a great many children.</remarks>
2593 int mapto_child_idx(const TCHAR* name)
2595 if(!_root || !name) return -1;
2596 register unsigned int n = _root->children;
2597 #ifdef PUGOPT_NONSEG
2598 const int namelen = _tcslen(name);
2599 #endif
2600 for(register unsigned int i=0; i<n; ++i)
2604 _root->child[i]->name &&
2605 #ifdef PUGOPT_NONSEG
2606 matches_child_name(name,namelen,i)
2607 #else
2608 matches_child_name(name,i)
2609 #endif
2611 return i;
2613 return -1;
2616 //Traversal Helpers
2617 public:
2619 //<summary>Find all elements having the given name.</summary>
2620 //<param name="name">Reference to name of child to find.</param>
2621 //<param name="found">Reference to xml_node_list or pointer_array to receive the matching elements.
2622 void all_elements_by_name(const std::string& name,pointer_array& found){ all_elements_by_name(name.c_str(),found); }
2624 //<summary>Find all elements having the given name.</summary>
2625 //<param name="name">Pointer to name of child to find.</param>
2626 //<param name="found">Reference to xml_node_list or pointer_array to receive the matching elements.</param>
2627 void all_elements_by_name(const TCHAR* name,pointer_array& found)
2629 if(empty() || !name) return; //Invalid node, so fail.
2630 if(_root->children > 0) //Has children.
2632 #ifdef PUGOPT_NONSEG
2633 const unsigned int namelen = _tcslen(name);
2634 #endif
2635 register unsigned int n = _root->children; //For each child.
2636 for(register unsigned int i=0; i<n; ++i)
2640 _root->child[i] && //There is a child at i.
2641 _root->child[i]->name && //The child has a name.
2642 #ifdef PUGOPT_NONSEG
2643 matches_child_name(name,namelen,i)
2644 #else
2645 matches_child_name(name,i)
2646 #endif
2648 found.push_back(_root->child[i]); //push_back it to the array.
2649 if(_root->child[i]->children) //If there are children.
2651 xml_node subsearch(_root->child[i]); //Wrap it up for ease.
2652 subsearch.all_elements_by_name(name,found); //Find any matching children.
2658 //<summary>
2659 // Recursively-implemented depth-first find the first matching element.
2660 // Use for shallow drill-downs.
2661 //</summary>
2662 //<param name="name">Const reference to name of element to find.</param>
2663 //<returns>Valid xml_node if such element named 'name' is found.</returns>
2664 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2665 xml_node first_element_by_name(const std::string& name){ return first_element_by_name(name.c_str()); }
2667 //<summary>
2668 // Recursively-implemented depth-first find the first matching element.
2669 // Use for shallow drill-downs.
2670 //</summary>
2671 //<param name="name">Pointer to name of element to find.</param>
2672 //<returns>Valid xml_node if such element named 'name' is found.</returns>
2673 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2674 xml_node first_element_by_name(const TCHAR* name)
2676 if(empty() || !name) return xml_node(); //Invalid node, so fail.
2677 if(_root->children > 0) //Has children.
2679 register unsigned int n = _root->children; //For each child.
2680 #ifdef PUGOPT_NONSEG
2681 const int namelen = _tcslen(name);
2682 #endif
2683 for(register unsigned int i=0; i<n; ++i)
2687 _root->child[i]->name &&
2688 #ifdef PUGOPT_NONSEG
2689 matches_child_name(name,namelen,i)
2690 #else
2691 matches_child_name(name,i)
2692 #endif
2694 return xml_node(_root->child[i]);
2695 else if(_root->child[i]->children)
2697 xml_node subsearch(_root->child[i]); //Wrap it up for ease.
2698 xml_node found = subsearch.first_element_by_name(name);
2699 if(!found.empty()) return found; //Found.
2703 return xml_node(); //Not found.
2706 //<summary>
2707 // Recursively-implemented depth-first find the first matching element
2708 // also having matching PCDATA.
2709 //</summary>
2710 //<param name="name">Reference to name of element to find.</param>
2711 //<param name="value">Reference to PCDATA to find.</param>
2712 //<returns>Valid xml_node if such element named 'name' is found with PCDATA 'value'.</returns>
2713 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2714 xml_node first_element_by_value(const std::string& name,const std::string& value){ return first_element_by_value(name.c_str(),value.c_str()); }
2716 //<summary>
2717 // Recursively-implemented depth-first find the first matching element
2718 // also having matching PCDATA.
2719 //</summary>
2720 //<param name="name">Pointer to name of element to find.</param>
2721 //<param name="value">Pointer to PCDATA to find.</param>
2722 //<returns>Valid xml_node if such element named 'name' is found with PCDATA 'value'.</returns>
2723 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2724 xml_node first_element_by_value(const TCHAR* name,const TCHAR* value)
2726 if(empty() || !name || !value) return xml_node(); //Invalid node, so fail.
2727 if(_root->children > 0) //Has children.
2729 register unsigned int n = _root->children; //For each child.
2730 #ifdef PUGOPT_NONSEG
2731 const unsigned int namelen = _tcslen(name);
2732 const unsigned int valulen = _tcslen(value);
2733 #endif
2734 for(register unsigned int i=0; i<n; ++i)
2738 _root->child[i] && //There is a child at i.
2739 _root->child[i]->name && //The child has a name.
2740 #ifdef PUGOPT_NONSEG
2741 matches_child_name(name,namelen,i)
2742 #else
2743 matches_child_name(name,i)
2744 #endif
2747 register unsigned int m = _root->child[i]->children; //For each child of child.
2748 for(register unsigned int j=0; j<m; ++j)
2752 _root->child[i]->child[j] && //There is a child at j.
2753 _root->child[i]->child[j]->type == node_pcdata && //It is of the PCDATA type.
2754 _root->child[i]->child[j]->value && //It has data.
2755 #ifdef PUGOPT_NONSEG
2756 matches_value(value,valulen,_root->child[i]->child[j])
2757 #else
2758 matches_value(value,_root->child[i]->child[j])
2759 #endif
2761 return xml_node(_root->child[i]); //Wrap it up and return.
2764 else if(_root->child[i] && _root->child[i]->children) //The child has children.
2766 xml_node subsearch(_root->child[i]); //Wrap it up for ease.
2767 xml_node found = subsearch.first_element_by_value(name,value); //Search any children.
2768 if(!found.empty()) return found; //Found.
2772 return xml_node(); //Not found.
2775 //<summary>
2776 // Recursively-implemented depth-first find the first matching element
2777 // also having matching attribute.
2778 //</summary>
2779 //<param name="name">Reference to name of element to find.</param>
2780 //<param name="attr_name">Reference to name of attribute to find.</param>
2781 //<param name="attr_value">Reference to attribute value to find.</param>
2782 //<returns>Valid xml_node if such element named 'name' is found.</returns>
2783 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2784 xml_node first_element_by_attribute(const std::string& name,const std::string& attr_name,const std::string& attr_value){ return first_element_by_attribute(name.c_str(),attr_name.c_str(),attr_value.c_str()); }
2786 //<summary>
2787 // Recursively-implemented depth-first find the first matching element
2788 // also having matching attribute.
2789 //</summary>
2790 //<param name="name">Pointer to name of element to find.</param>
2791 //<param name="attr_name">Pointer to name of attribute to find.</param>
2792 //<param name="attr_value">Pointer to attribute value to find.</param>
2793 //<returns>Valid xml_node if such element named 'name' is found.</returns>
2794 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2795 xml_node first_element_by_attribute(const TCHAR* name,const TCHAR* attr_name,const TCHAR* attr_value)
2797 if(empty() || !name || !attr_name || !attr_value) return xml_node(); //Invalid data, so fail.
2798 if(_root->children > 0) //Has children.
2800 #ifdef PUGOPT_NONSEG
2801 const unsigned int namelen = _tcslen(name);
2802 const unsigned int attrlen = _tcslen(attr_name);
2803 const unsigned int valulen = _tcslen(attr_value);
2804 #endif
2805 register unsigned int n = _root->children; //For each child.
2806 for(register unsigned int i=0; i<n; ++i)
2810 _root->child[i] && //There is a child at i.
2811 _root->child[i]->name && //The child has a name.
2812 #ifdef PUGOPT_NONSEG
2813 matches_name(name,namelen,_root->child[i])
2814 #else
2815 matches_name(name,_root->child[i])
2816 #endif
2819 register unsigned int m = _root->child[i]->attributes; //For each attribute of child.
2820 for(register unsigned int j=0; j<m; ++j)
2824 _root->child[i]->attribute[j] && //There is an attribute at j.
2825 _root->child[i]->attribute[j]->name && //The attribute has a name.
2826 #ifdef PUGOPT_NONSEG
2827 matches_attribute_name(attr_name,attrlen,_root->child[i]->attribute[j]) &&
2828 #else
2829 matches_attribute_name(attr_name,_root->child[i]->attribute[j]) &&
2830 #endif
2831 _root->child[i]->attribute[j]->value && //The attribute has a value.
2832 #ifdef PUGOPT_NONSEG
2833 matches_attribute_name_value(attr_value,valulen,_root->child[i]->attribute[j])
2834 #else
2835 matches_attribute_name_value(attr_value,_root->child[i]->attribute[j])
2836 #endif
2838 return xml_node(_root->child[i]); //Wrap it up and return.
2841 else if(_root->child[i] && _root->child[i]->children)
2843 xml_node subsearch(_root->child[i]); //Wrap it up for ease.
2844 xml_node found = subsearch.first_element_by_attribute(name,attr_name,attr_value); //Search any children.
2845 if(!found.empty()) return found; //Found.
2849 return xml_node(); //Not found.
2852 //<summary>
2853 // Recursively-implemented depth-first find the first matching entity.
2854 // Use for shallow drill-downs.
2855 //</summary>
2856 //<param name="name">Pointer to name of element to find.</param>
2857 //<returns>Valid xml_node if such element named 'name' is found.</returns>
2858 //<remarks>xml_node may be invalid if not found; test with 'empty'.</remarks>
2859 xml_node first_node(xml_node_type type)
2861 if(!_root) return xml_node();
2862 if(_root->children > 0) //Has children.
2864 register unsigned int n = _root->children; //For each child.
2865 for(register unsigned int i=0; i<n; ++i)
2867 if(_root->child[i]->type==type)
2868 return xml_node(_root->child[i]);
2869 else if(_root->child[i]->children)
2871 xml_node subsearch(_root->child[i]);
2872 xml_node found = subsearch.first_node(type);
2873 if(!found.empty()) return found; //Found.
2877 return xml_node(); //Not found.
2880 //<summary>Move to the absolute root of the document tree.</summary>
2881 //<returns>True if the current node is valid.</returns>
2882 //<remarks>Member '_root' may now point to absolute root of the document.</remarks>
2883 bool moveto_root()
2885 if(empty()) return false; //Nowhere to go.
2886 while(!type_document()) _root = _root->parent; //Keep stepping out until we hit the root.
2887 return true; //Success.
2890 //<summary>Move to the current node's parent.</summary>
2891 //<returns>true if there is a parent and cursor is not parent, and cursor points thereto.</returns>
2892 //<remarks>'_root' may now point to parent.</remarks>
2893 bool moveto_parent()
2895 if(empty() || type_document()) return false; //Invalid, or at the root (has no parent).
2896 _root = _root->parent; //Move to parent.
2897 return true; //Success.
2900 //<summary>
2901 // Move to the current node's sibling at subscript. Equivalent to
2902 // 'moveto_child' following 'moveto_parent'.
2903 //</summary>
2904 //<param name="i">Subscript of sibling to move cursor to.</param>
2905 //<returns>True if valid subscript, and cursor points thereto.</returns>
2906 //<remarks>If matching co-node was found, '_root' points thereto.</remarks>
2907 bool moveto_sibling(unsigned int i)
2909 if(empty()) return false; //Nowhere to go.
2910 xml_node_struct* restore = _root; //Save position in case invalid subscript & we want to restore.
2911 if(moveto_parent()) //Try to move to parent.
2913 if(i < children()) //Subscript is in range. (Assume parent *does* have children.)
2915 _root = _root->child[i]; //Move to child at subscript ('sibling').
2916 return true; //Success.
2919 _root = restore; //Bad subscript, or parent move; restore.
2920 return false;
2923 //<summary>Move to the current node's first sibling matching given name.</summary>
2924 //<param name="name">Element name of sibling to move to.</param>
2925 //<returns>True if sibling was found, and cursor points thereto.</returns>
2926 //<remarks>If matching co-node was found, '_root' points thereto.</remarks>
2927 bool moveto_first_sibling(const std::string& name){ return moveto_first_sibling(name.c_str()); }
2929 //<summary>Move to the current node's first sibling matching given name.</summary>
2930 //<param name="name">Element name of sibling to move to.</param>
2931 //<returns>True if sibling was found, and cursor points thereto.</returns>
2932 //<remarks>If matching co-node was found, '_root' points thereto.</remarks>
2933 bool moveto_first_sibling(const TCHAR* name)
2935 if(empty() || !name) return false; //Nowhere to go, or nothing to find.
2936 xml_node_struct* restore = _root; //Save position in case invalid subscript & we want to restore.
2937 if(moveto_parent()) //Try to move to parent.
2939 #ifdef PUGOPT_NONSEG
2940 const unsigned int namelen = _tcslen(name);
2941 #endif
2942 register unsigned int n = children(); //Search for matching name
2943 for(register unsigned int i=0; i<n; ++i)
2945 //NF 24 Jan 2003 Changed to get child(i) just once per iteration.
2946 xml_node node = child(i); //Access child node at subscript as xml_node or xml_node(NULL) if bad subscript.
2947 if(node.type_element()||node.type_pi()) //Other types won't have names.
2949 #ifdef PUGOPT_NONSEG
2950 if(_tcsncmp(name,node.name(),max(namelen,node.name_size()))==0) //Do names match?
2951 #else
2952 if(strcmpwild(name,node.name())==0) //Do names match?
2953 #endif
2955 _root = node; //Move there.
2956 return true; //Success.
2961 _root = restore; //Failed to locate any such sibling; restore position.
2962 return false;
2965 //<summary>Move to the current node's child at subscript.</summary>
2966 //<param name="i">Subscript of child to move cursor to.</param>
2967 //<returns>true if valid subscript, and cursor points thereto.</returns>
2968 //<remarks>If matching sub-node was found, '_root' points thereto.</remarks>
2969 bool moveto_child(unsigned int i)
2971 if(empty()) return false; //Null, so no children.
2972 if(has_child_nodes() && i < children()) //Has children and subscript is in bounds.
2974 _root = child(i); //Move to the child at i.
2975 return true; //Success.
2977 return false; //Failure.
2980 //<summary>Move to the current node's child matching given name.</summary>
2981 //<param name="name">Element name of child to move to if found.</param>
2982 //<returns>True if child was found, and cursor points thereto.</returns>
2983 //<remarks>If matching sub-node was found, '_root' points thereto.</remarks>
2984 bool moveto_child(const std::string& name){ return moveto_child(name.c_str()); }
2986 //<summary>Move to the current node's child matching given name.</summary>
2987 //<param name="name">Element name of child to move to if found.</param>
2988 //<returns>True if child was found, and cursor points thereto.</returns>
2989 //<remarks>If matching sub-node was found, '_root' points thereto.</remarks>
2990 bool moveto_child(const TCHAR* name)
2992 if(empty() || !name || !has_child_nodes()) return false; //The node is null, a name was not specified, or node has no children.
2993 #ifdef PUGOPT_NONSEG
2994 const unsigned int namelen = _tcslen(name);
2995 #endif
2996 register unsigned int n = children(); //For each child.
2997 for(register unsigned int i=0; i<n; ++i)
2999 //NF 24 Jan 2003: Changed to get child(i) just once per iteration.
3000 xml_node node = child(i); //Access child node at subscript as xml_node or xml_node(NULL) if bad subscript.
3001 #ifdef PUGOPT_NONSEG
3002 if(_tcsncmp(name,node.name(),max(namelen,node.name_size()))==0) //Do names match?
3003 #else
3004 if(strcmpwild(name,node.name())==0) //If the name is identical with 'name'.
3005 #endif
3007 _root = node; //Move to it.
3008 return true; //Success.
3011 return false; //Failure.
3014 //<summary>Move to the current node's next sibling by position and name.</summary>
3015 //<param name="name">Name of sibling to move to if found.</param>
3016 //<returns>True if there is a next sibling, and cursor points thereto.</returns>
3017 bool moveto_next_sibling(const std::string& name){ return moveto_next_sibling(name.c_str()); }
3019 //<summary>Move to the current node's next sibling by position and name.</summary>
3020 //<param name="name">Name of sibling to move to if found.</param>
3021 //<returns>True if there is a next sibling, and cursor points thereto.</returns>
3022 bool moveto_next_sibling(const TCHAR* name)
3024 if(empty() || type_document() || !_root->parent || !name) return false; //Null, or at root, or no name, so there are no valid matches.
3025 #ifdef PUGOPT_NONSEG
3026 const unsigned int namelen = _tcslen(name);
3027 #endif
3028 register unsigned int n = _root->parent->children; //For each child of parent.
3029 for(register unsigned int i=0; i<(n-1); ++i)
3033 _root->parent->child[i] && //There is a child at i.
3034 _root->parent->child[i] == _root && //The child is identical with this node.
3035 i < (n-1) //This is not the last child.
3038 for(++i; i<n; ++i) //For each following child.
3042 _root->parent->child[i] && //There is a child at i.
3043 _root->parent->child[i]->name && //The child's name is not null.
3044 #ifdef PUGOPT_NONSEG
3045 matches_name(name,namelen,_root->parent->child[i])
3046 #else
3047 matches_name(name,_root->parent->child[i])
3048 #endif
3051 moveto_sibling(i); //Move to it.
3052 return true; //Success.
3057 return false; //Failure.
3060 //<summary>Move to the current node's next sibling by position.</summary>
3061 //<returns>True if there is a next sibling, and cursor points thereto.</returns>
3062 bool moveto_next_sibling()
3064 if(empty() || type_document() || !_root->parent) return false; //Null or at root, so there are no valid siblings.
3065 register unsigned int n = _root->parent->children; //For each child of parent (each sibling).
3066 for(register unsigned int i=0; i<(n-1); ++i)
3070 _root->parent->child[i] && //There is a child at i.
3071 _root->parent->child[i] == _root && //The child is identical with this node.
3072 i < (n-1) //This is not the last child.
3075 for(++i; i<n; ++i) //For each following child.
3077 if(_root->parent->child[i]) //There is a child at i.
3079 moveto_sibling(i); //Move to it.
3080 return true; //Success.
3085 return false; //Failure.
3088 //<summary>Compile the absolute node path from root as a text string.</summary>
3089 //<param name="delimiter">Delimiter string to insert between element names.</param>
3090 //<returns>Path string (e.g. with '/' as delimiter, '/document/.../this'.</returns>
3091 std::string path(const TCHAR* delimiter = _T("/"))
3093 TCHAR* path = NULL; //Current path.
3094 TCHAR* temp; //Temporary pointer.
3095 xml_node cursor = *this; //Make a copy.
3096 #ifdef PUGOPT_NONSEG
3097 unsigned int destlen = 0;
3098 strcatgrown_impl(&path,cursor.name(),destlen,cursor.name_size()); //Get this name.
3099 #else
3100 strcatgrow(&path,cursor.name()); //Get this name.
3101 #endif
3102 while(cursor.moveto_parent() && !cursor.type_document()) //Loop to parent (stopping on actual root because it has no name).
3104 temp = NULL; //Mark as null so 'strcatgrow' will allocate memory.
3105 #ifdef PUGOPT_NONSEG
3106 destlen = 0;
3107 strcatgrown_impl(&temp,cursor.name(),destlen,cursor.name_size()); //Append next element name.
3108 #else
3109 strcatgrow(&temp,cursor.name()); //Append next element name.
3110 #endif
3111 strcatgrow(&temp,delimiter); //Append delimiter.
3112 strcatgrow(&temp,path); //Append current path.
3113 free(path); //Free the old path.
3114 path = temp; //Set path as new string.
3116 temp = NULL;
3117 strcatgrow(&temp,delimiter); //Prepend final delimiter.
3118 strcatgrow(&temp,path); //Append current path.
3119 free(path); //Free the old path.
3120 std::string returns = temp; //Set path as new string.
3121 free(temp);
3122 return returns; //Return the path;
3125 //<summary>Search for a node by path.</summary>
3126 //<param name="path">
3127 // Path string; e.g. './foo/bar' (relative to node), '/foo/bar' (relative
3128 // to root), '../foo/bar' (pop relative position).
3129 //</param>
3130 //<param name="delimiter">Delimiter string to use in tokenizing path.</param>
3131 //<returns>Matching node, or xml_node(NULL) if not found.</returns>
3132 xml_node first_element_by_path(const std::string& path,const std::string& delimiter = _T("/")){ return first_element_by_path(path.c_str(),delimiter.c_str()); }
3134 //<summary>Search for a node by path.</summary>
3135 //<param name="path">
3136 // Path string; e.g. './foo/bar' (relative to node), '/foo/bar' (relative
3137 // to root), '../foo/bar' (pop relative to position).
3138 //</param>
3139 //<param name="delimiter">Delimiter string to use in tokenizing path.</param>
3140 //<returns>Matching node, or xml_node(NULL) if not found.</returns>
3141 //<remarks>To-do: Support XPath-style queries.</remarks>
3142 xml_node first_element_by_path(const TCHAR* path,const TCHAR* delimiter = _T("/"))
3144 if(!path) return xml_node();
3145 TCHAR* temp = NULL;
3146 pointer_array path_segments; //Array of path segments.
3147 xml_node found = *this; //Current search context.
3148 strcatgrow(&temp,path);
3149 TCHAR* name = _tcstok(temp,delimiter);
3150 while(name) //Tokenize the whole path.
3152 path_segments.push_back((void*)name); //push_back it to array.
3153 name = _tcstok(NULL,delimiter); //Get the next token,
3155 register unsigned int n = path_segments.size();
3156 if(n == 0) return xml_node(); //Return null node if no path segments.
3157 if(path[0]==delimiter[0]) found.moveto_root(); //Absolute path; e.g. '/foo/bar'
3158 for(register unsigned int i = 0; i<n; ++i) //For each path segment.
3160 name = (TCHAR*)path_segments.at(i);
3161 if(name)
3163 if(*name==_T('.')) //Is '.' or '..'
3165 if(_tcscmp(name,_T(".."))==0) found.moveto_parent(); //Pop.
3166 else continue; //Ignore '.' since it is redundant if path is './path'.
3168 else
3170 register unsigned int j, m = found.children(); //For each child.
3171 for(j=0; j<m; ++j)
3173 if(found.child(j).has_name(name)) //Name matches?
3175 found = found.child(j); //Move to this child.
3176 goto NEXT_ELEM; //Search next path segment.
3179 if(found.moveto_next_sibling(found.name())) //Find next sibling having same name.
3181 if(i > 0) --i; //Try the previous path segment.
3182 goto NEXT_ELEM;
3184 else //Move to parent to search further.
3186 if(!found.type_document() && found.moveto_parent() && !found.type_document()) //Not root and stepped to parent and parent is not root.
3188 if(i > 0) --i; //Try the previous path segment.
3189 if(found.moveto_next_sibling(found.name())) //Try to find next sibling having same name.
3191 if(i > 0) --i; //Try the previous path segment.
3192 goto NEXT_ELEM;
3198 NEXT_ELEM:;
3199 if(found.type_document()) //Can't move up any higher, so fail.
3201 free(temp); //Got to free this.
3202 return xml_node(); //Return null node.
3205 free(temp); //Got to free this.
3206 return found; //Return the matching node.
3209 //<summary>Recursively traverse the tree.</summary>
3210 //<param name="walker">Reference to tree walker derived from xml_tree_walker.</param>
3211 //<returns>True if traversal was not halted by xml_tree_walker::for_each() callback.</returns>
3212 bool traverse(xml_tree_walker& walker)
3214 if(walker.depth() == 0 && !walker.begin(*this)) return false; //Send the callback for begin traverse if depth is zero.
3215 if(!empty()) //Don't traveres if this is a null node.
3217 walker.push(); //Increment the walker depth counter.
3218 register unsigned int n = _root->children; //For each child.
3219 for(register unsigned int i=0; i<n; ++i)
3221 if(_root->child[i]) //There is a child at i.
3223 xml_node subsearch(_root->child[i]); //Wrap it.
3224 if(!(walker.for_each(subsearch) && subsearch.traverse(walker)))
3225 return false; //Traversal was aborted.
3228 walker.pop(); //Decrement the walker depth counter.
3230 if(walker.depth() == 0 && !walker.end(*this)) return false; //Send the callback for end traverse if depth is zero.
3231 return true;
3234 //Editorial Helpers
3235 public:
3237 //<summary>Set element name.</summary>
3238 //<param name="new_name">New element name.</param>
3239 //<returns>Success.</returns>
3240 bool name(const std::string& new_name){ return name(new_name.c_str()); }
3242 //<summary>Set element name.</summary>
3243 //<param name="new_name">New element name.</param>
3244 //<returns>Success.</returns>
3245 bool name(const TCHAR* new_name)
3247 if((type_element() || type_pi()) && new_name)
3248 #ifdef PUGOPT_NONSEG
3249 return strcpyinsitu(&_root->name,new_name,&_root->name_insitu,_root->name_size );
3250 #else
3251 return strcpyinsitu(&_root->name,new_name,&_root->name_insitu);
3252 #endif
3253 return false;
3256 //<summary>Set node data.</summary>
3257 //<param name="value">New data (PCDATA, CDATA, or comment) value.</param>
3258 //<returns>Success.</returns>
3259 bool value(const std::string& new_value){ return value(new_value.c_str()); }
3261 //<summary>Set node data.</summary>
3262 //<param name="value">New data (PCDATA, CDATA, or comment) value.</param>
3263 //<returns>Success.</returns>
3264 bool value(const TCHAR* new_value)
3266 if((type_pcdata() || type_cdata() || type_comment()) && new_value)
3267 #ifdef PUGOPT_NONSEG
3268 return strcpyinsitu(&_root->value,new_value,&_root->value_insitu,_root->value_size);
3269 #else
3270 return strcpyinsitu(&_root->value,new_value,&_root->value_insitu);
3271 #endif
3272 return false;
3275 //<summary>Remove attribute at the given subscript.</summary>
3276 //<param name="i">Subscript.</param>
3277 //<returns>Success.</returns>
3278 bool remove_attribute(unsigned int i)
3280 unsigned int n = _root->attributes;
3281 if(i < n)
3283 xml_attribute_struct* temp = _root->attribute[i];
3284 --n;
3285 for(unsigned int j=i; j<n; ++j)
3286 _root->attribute[j] = _root->attribute[j+1];
3287 _root->attribute[n] = NULL;
3288 if(!temp->name_insitu) free(temp->name);
3289 if(!temp->value_insitu) free(temp->value);
3290 free(temp);
3291 --_root->attributes;
3292 return true;
3294 return false;
3297 //<summary>Remove attribute having the given name.</summary>
3298 //<param name="name">Name of attribute to delete.</param>
3299 //<returns>Success.</returns>
3300 bool remove_attribute(const std::string& name){ return remove_attribute(name.c_str()); }
3302 //<summary>Remove attribute having the given name.</summary>
3303 //<param name="name">Name of attribute to delete.</param>
3304 //<returns>Success.</returns>
3305 bool remove_attribute(const TCHAR* name)
3307 int i = mapto_attribute_idx(name);
3308 if(i > -1) return remove_attribute((unsigned int)i);
3309 return false;
3312 //<summary>Append a new attribute to the node list of attributes.</summary>
3313 //<param name="name">Name.</param>
3314 //<param name="value">Value thereof.</param>
3315 //<returns>Attribute structure wrapper.</returns>
3316 //<remarks>Pointer space may be grown, memory for name/value members allocated.</remarks>
3317 xml_attribute append_attribute(const std::string& name,const std::string& value){ return append_attribute(name.c_str(),value.c_str()); }
3319 //<summary>Append a new attribute to the node list of attributes.</summary>
3320 //<param name="name">Name.</param>
3321 //<param name="value">Value thereof.</param>
3322 //<returns>Attribute structure wrapper.</returns>
3323 //<remarks>Pointer space may be grown, memory for name/value members allocated.</remarks>
3324 xml_attribute append_attribute(const TCHAR* name,const TCHAR* value)
3326 if(!name || !value) return xml_attribute(); //We must have both to proceed.
3327 xml_attribute_struct* p = pug::append_attribute(_root,1); //Append/allocate a new attribute structure.
3328 if(p) //If append/allocate succeeded.
3330 #ifdef PUGOPT_NONSEG
3331 strcatgrown(&p->name,name,p->name_size); //Append the name.
3332 strcatgrown(&p->value,value,p->value_size); //Append the name.
3333 #else
3334 strcatgrow(&p->name,name); //Append the name.
3335 strcatgrow(&p->value,value); //Append the name.
3336 #endif
3337 p->name_insitu = p->value_insitu = false; //Mark as not part of original parse string.
3338 return xml_attribute(p); //Success.
3340 return xml_attribute(); //Failure; return an empty.
3343 //<summary>Append a new attribute of type long to the node list of attributes.</summary>
3344 //<param name="name">Name.</param>
3345 //<param name="value">Value thereof.</param>
3346 //<returns>Attribute structure wrapper.</returns>
3347 //<remarks>Pointer space may be grown, memory for name/value members allocated.</remarks>
3348 xml_attribute append_attribute(const TCHAR* name,long value)
3350 if(!name) return false;
3351 TCHAR temp[32] = {0};
3352 _stprintf(temp,_T("%ld"),value);
3353 return append_attribute(name,temp);
3356 //<summary>Append a new attribute of type double to the node list of attributes.</summary>
3357 //<param name="name">Name.</param>
3358 //<param name="value">Value thereof.</param>
3359 //<returns>Attribute structure wrapper.</returns>
3360 //<remarks>Pointer space may be grown, memory for name/value members allocated.</remarks>
3361 xml_attribute append_attribute(const TCHAR* name,double value)
3363 if(!name) return false;
3364 TCHAR temp[32] = {0};
3365 _stprintf(temp,_T("%lf"),value);
3366 return append_attribute(name,temp);
3369 //<summary>Append a new attribute of type bool to the node list of attributes.</summary>
3370 //<param name="name">Name.</param>
3371 //<param name="value">Value thereof.</param>
3372 //<returns>Attribute structure wrapper.</returns>
3373 //<remarks>Pointer space may be grown, memory for name/value members allocated.</remarks>
3374 xml_attribute append_attribute(const TCHAR* name,bool value)
3376 if(!name) return false;
3377 return append_attribute(name,((value)?_T("true"):_T("false")));
3380 //<summary>Set the current node entity type.</summary>
3381 //<param name="new_type">New type to set.</param>
3382 //<returns>Previous type.</returns>
3383 //<remarks>If has children and now is not node_element, children are obscured.</remarks>
3384 xml_node_type type(xml_node_type new_type)
3386 xml_node_type prev = _root->type; //Save old type.
3387 _root->type = new_type; //Set new type.
3388 return prev; //Return old type.
3391 //<summary>
3392 // Allocate & append a child node of the given type at the end of the
3393 // current node array of children.
3394 //</summary>
3395 //<param name="type">New child node type.</param>
3396 //<returns>xml_node wrapping the new child.</returns>
3397 //<remarks>Pointer space may be grown. An xml_node_struct structure is allocated.</remarks>
3398 xml_node append_child(xml_node_type type)
3400 if(type_document()||type_element()) //Don't do anything if not an node_element or root.
3402 xml_node_struct* p = pug::append_node(_root,1,type); //Append the node.
3403 if(p)
3405 p->name_insitu = p->value_insitu = false;
3406 return xml_node(p); //If we have it, return wrapped.
3409 return xml_node(); //Return dummy.
3412 //<summary>Allocate & insert a child node of the given type at subscript.</summary>
3413 //<param name="i">Subscript at which to insert.</param>
3414 //<param name="type">New child node type.</param>
3415 //<returns>xml_node wrapping the new child.</returns>
3416 //<remarks>
3417 // Pointer space may be grown. An xml_node_struct structure is allocated,
3418 // and existing children are shifted in their array position.
3419 //</remarks>
3420 xml_node insert_child(unsigned int i,xml_node_type type)
3422 if(!type_element()) return xml_node(); //Don't do anything if not an node_element.
3423 unsigned int n = _root->children; //Get count of existing children.
3424 if(type_element() && i >= n) return append_child(type); //If subscript at end of array then just append.
3425 else if(type_element() && i < n)
3427 xml_node_struct* p = pug::append_node(_root,1,type); //Append the new node (by default at last array position).
3428 if(p) //Ensure we have it.
3430 register int m = (i-1); //Stop at i.
3431 for(register int j=(n-1); j>m; --j) //Starting at one less than end of array, reverse loop to i.
3432 _root->child[j+1] = _root->child[j]; //Shift node to right.
3433 _root->child[i] = p; //Set node at subscript to new node.
3434 return xml_node(p); //Return new node.
3437 return xml_node(); //Return dummy.
3440 //<summary>Delete the child node at the given subscript.</summary>
3441 //<param name="i">Subscript.</param>
3442 //<returns>Success.</returns>
3443 //<remarks>Shifts child array element positions. Frees entire tree under child to be deleted.</remarks>
3444 bool remove_child(unsigned int i)
3446 unsigned int n = _root->children;
3447 if(i < n) //Ensure subscript is in bounds.
3449 xml_node_struct* p = _root->child[i]; //Keep a pointer to this node so we can free it.
3450 --n;
3451 unsigned int j;
3452 for(j=i; j<n; ++j) //Shift everything left from this point on.
3453 _root->child[j] = _root->child[j+1];
3454 _root->child[j] = NULL; //Mark the last element null.
3455 --_root->children; //One less children.
3456 p->parent = p; //This ensures we only free this node when calling 'free_node'.
3457 pug::free_node(p); //Free the node tree.
3458 return true; //Success.
3460 return false; //Failure.
3463 //Stream/Output Helpers
3464 public:
3466 //<summary>
3467 // Stream output. Recursively writes the internal xml_node_struct structure
3468 // to the given stream.
3469 //</summary>
3470 //<param name="os">Reference to output stream.</param>
3471 //<param name="indent_char">Char to use for indent.</param>
3472 //<param name="breaks">Use linebreaks?</param>
3473 //<remarks>String data is written to stream.</remarks>
3474 void outer_xml(std::basic_ostream<TCHAR,std::char_traits<TCHAR> >& os,TCHAR indent_char = _T('\t'),bool breaks = true)
3476 if(empty()) return; //Make sure there is something to output.
3477 indent_stack indent(indent_char); //Prepare the indent.
3478 if(type_document()) //If this is the root, we don't want to output the root itself.
3480 register unsigned int n = _root->children; //Output each child of the root.
3481 for(register unsigned int i=0; i<n; ++i)
3482 pug::outer_xml(os,indent,_root->child[i],breaks);
3484 else pug::outer_xml(os,indent,_root,breaks); //Output the node.
3487 //<summary>
3488 // Stream output operator. Wraps 'outer_xml'. Recursively writes
3489 // the given node to the given stream.
3490 //</summary>
3491 //<param name="os">Reference to output stream.</param>
3492 //<param name="xml_node">Reference to tree node.</param>
3493 //<returns>Reference to output stream.</returns>
3494 //<remarks>String data is written to stream.</remarks>
3495 friend std::basic_ostream<TCHAR,std::char_traits<TCHAR> >& operator<<(std::basic_ostream<TCHAR,std::char_traits<TCHAR> >& os,xml_node node)
3497 if(!os.good()) return os;
3498 if((os.flags()|std::ostream::skipws) == std::ostream::skipws)
3499 node.outer_xml(os,0,false); //Skipping whitespace; suppress indents & linebreaks.
3500 else node.outer_xml(os); //Default options.
3501 return os;
3506 //<summary>Provides a high-level interface to the XML parser.</summary>
3507 class xml_parser
3509 //Internal Data Members
3510 protected:
3512 xml_node_struct* _xmldoc; //Pointer to current XML document tree root.
3513 long _growby; //Attribute & child pointer space growth increment.
3514 bool _autdel; //Delete the tree on destruct?
3515 TCHAR* _buffer; //Pointer to in-memory buffer (for 'parse_file').
3516 TCHAR* _strpos; //Where parsing left off (for 'parse_file').
3517 unsigned long _optmsk; //Parser options.
3518 #ifdef PUGOPT_MEMFIL
3519 HANDLE _mmfile; //File handle.
3520 HANDLE _mmfmap; //Handle which maps the file.
3521 void* _mmaddr; //Base address of map.
3522 size_t _mfsize; //Size of memory-mapped file.
3523 bool _addeos; //True if we had to add a 0 to then end of the file.
3524 #endif
3526 //Construction/Destruction
3527 public:
3529 //<summary>Constructor.</summary>
3530 //<param name="optmsk">Options mask.</param>
3531 //<param name="autdel">Delete tree on destruct?</param>
3532 //<param name="growby">Parser pointer space growth increment.</param>
3533 //<remarks>Root node structure is allocated.</remarks>
3534 xml_parser(unsigned long optmsk = parse_default,bool autdel = true,long growby = parse_grow):
3535 _xmldoc(0),
3536 _growby(growby),
3537 _autdel(autdel),
3538 _optmsk(optmsk),
3539 _buffer(0),
3540 _strpos(0)
3541 #ifdef PUGOPT_MEMFIL
3543 _mmfile(0),
3544 _mmfmap(0),
3545 _mmaddr(0),
3546 _mfsize(0),
3547 _addeos(false)
3548 #endif
3552 //<summary>Direct parse constructor.</summary>
3553 //<param name="xmlstr">
3554 // XML-formatted string to parse. Note: String must persist for the
3555 // life of the tree. String is zero-segmented, but not freed.
3556 //</param>
3557 //<param name="optmsk">Parser options.</param>
3558 //<param name="autdel">Delete tree on destruct?</param>
3559 //<param name="growby">Parser pointer space growth increment.</param>
3560 //<remarks>Root node structure is allocated, string is parsed & tree may be grown.</remarks>
3561 xml_parser(TCHAR* xmlstr,unsigned long optmsk = parse_default,bool autdel = true,long growby = parse_grow) :
3562 _xmldoc(0),
3563 _growby(growby),
3564 _autdel(autdel),
3565 _optmsk(optmsk),
3566 _buffer(0),
3567 _strpos(0)
3568 #ifdef PUGOPT_MEMFIL
3570 _mmfile(0),
3571 _mmfmap(0),
3572 _mmaddr(0),
3573 _mfsize(0),
3574 _addeos(false)
3575 #endif
3577 parse(xmlstr,_optmsk); //Parse it.
3580 //<summary>Destructor.</summary>
3581 //<remarks>Tree memory and string memory may be freed.</remarks>
3582 virtual ~xml_parser()
3584 if(_autdel && _xmldoc) free_node(_xmldoc);
3585 if(_buffer) free(_buffer);
3586 #ifdef PUGOPT_MEMFIL
3587 close_memfile();
3588 #endif
3591 //Accessors/Operators
3592 public:
3594 operator xml_node_struct*() { return _xmldoc; } //Cast as xml_node_struct pointer to root.
3595 operator xml_node() { return xml_node(_xmldoc); } //Cast as xml_node (same as document).
3596 xml_node document(){ return xml_node(_xmldoc); } //Returns the root wrapped by an xml_node.
3598 //Miscellaneous
3599 public:
3601 //<summary>Allocate a new, empty root.</summary>
3602 //<remarks>Tree memory and string memory may be freed.</remarks>
3603 void create()
3605 clear(); //Free any allocated memory.
3606 _xmldoc = new_node(node_document); //Allocate a new root.
3607 _xmldoc->parent = _xmldoc; //Point to self.
3610 //<summary>Clear any existing tree or string.</summary>
3611 //<remarks>Tree memory and string memory may be freed.</remarks>
3612 void clear()
3614 if(_xmldoc){ free_node(_xmldoc); _xmldoc = 0; }
3615 if(_buffer){ free(_buffer); _buffer = 0; }
3616 #ifdef PUGOPT_MEMFIL
3617 close_memfile();
3618 #endif
3621 #ifdef PUGOPT_MEMFIL
3623 //Memory-Mapped File Support
3624 protected:
3626 //<summary>Closes any existing memory-mapped file.</summary>
3627 void close_memfile()
3629 if(_mmaddr != 0)
3631 UnmapViewOfFile(_mmaddr);
3632 _mmaddr = 0;
3634 if(_mmfmap != 0)
3636 CloseHandle(_mmfmap);
3637 _mmfmap = 0;
3639 if(_mmfile != 0)
3641 if(_addeos) //Remove the 0 we added to the end of the file.
3643 SetFilePointer(_mmfile,_mfsize,NULL,FILE_BEGIN);
3644 SetEndOfFile(_mmfile);
3645 _addeos = false;
3647 CloseHandle(_mmfile);
3648 _mmfile = 0;
3650 _mfsize = 0;
3653 public:
3655 #endif
3657 //<summary>Attach an externally-generated root to the parser.</summary>
3658 //<param name="root">Pointer to node structure.</param>
3659 //<returns>Pointer to old root if any.</returns>
3660 //<remarks>New root may be deleted on dtor if autodelete set.</remarks>
3661 xml_node_struct* attach(xml_node_struct* root)
3663 xml_node_struct* t = _xmldoc; //Save this root.
3664 _xmldoc = root; //Assign.
3665 _xmldoc->parent = _xmldoc; //Ensure we are the root.
3666 return t; //Return the old root if any.
3669 //<summary>Detach the current root from the parser.</summary>
3670 //<returns>Pointer to old root, if any.</returns>
3671 xml_node_struct* detach()
3673 xml_node_struct* t = _xmldoc; //Save this root.
3674 _xmldoc = 0; //So we don't delete later on if autodelete set.
3675 return t; //Return the old root if any.
3678 //<summary>Get parser optsions mask.</summary>
3679 //<returns>Options mask.</returns>
3680 unsigned long options(){ return _optmsk; }
3682 //<summary>Set parser options mask.</summary>
3683 //<param name="optmsk">Options mask to set.</param>
3684 //<returns>Old options mask.</returns>
3685 unsigned long options(unsigned long optmsk)
3687 unsigned long prev = _optmsk;
3688 _optmsk = optmsk;
3689 return prev;
3692 //<summary>Get pointer space growth size increment.</summary>
3693 //<returns>Grow size.</returns>
3694 unsigned long growby(){ return _growby; }
3696 //<summary>Set pointer space growth size increment.</summary>
3697 //<param name="grow">Grow size to set.</param>
3698 //<returns>Old size.</returns>
3699 unsigned long growby(long grow)
3701 long prev = _growby;
3702 _growby = grow;
3703 return prev;
3706 //<summary>Get parse file buffer last string position.</summary>
3707 //<returns>Last string position.</returns>
3708 //<remarks>
3709 // Use after parse_file, with parse_dtd_only set in order to recommence
3710 // parse of document body.
3711 //</remarks>
3712 TCHAR* strpos()
3714 return _strpos;
3717 //Parsing Helpers
3718 public:
3720 //<summary>Parse the given XML string in-situ.</summary>
3721 //<param name="s">Pointer to XML-formatted string.</param>
3722 //<param name="optmsk">Parser options mask.</param>
3723 //<returns>Last string position or null.</returns>
3724 //<remarks>Input string is zero-segmented.</remarks>
3725 TCHAR* parse(TCHAR* s,unsigned long optmsk = parse_noset)
3727 if(!s) return s;
3728 clear(); //Free any allocated memory.
3729 _xmldoc = new_node(node_document); //Allocate a new root.
3730 _xmldoc->parent = _xmldoc; //Point to self.
3731 if(optmsk != parse_noset) _optmsk = optmsk;
3732 return pug::parse(s,_xmldoc,_growby,_optmsk); //Parse the input string.
3736 //<summary>Load into memory and parse the contents of the file at the given path.</summary>
3737 //<param name="path">File path.</param>
3738 //<param name="optmsk">Parser options.</param>
3739 //<returns>Success if the file was loaded.</returns>
3740 //<remarks>
3741 // The file contents is loaded and stored in the member '_buffer' until
3742 // freed by calling 'Parse', 'parse_file', 'clear' or '~xml_parser'.
3743 //</remarks>
3744 bool parse_file(const TCHAR* path,unsigned long optmsk = parse_noset)
3746 if(!path) return false;
3747 clear(); //clear any existing data.
3748 unsigned long bytes;
3749 if(optmsk != parse_noset) _optmsk = optmsk;
3750 if(load_file(path,&_buffer,&bytes) && bytes > 0)
3752 _xmldoc = pug::new_node(node_document);
3753 _xmldoc->parent = _xmldoc; //Point to self.
3754 TCHAR* s = pug::parse(_buffer,_xmldoc,_growby,_optmsk);
3755 _strpos = s;
3756 return true;
3758 return false;
3762 #ifdef PUGOPT_MEMFIL
3764 //<summary>Parse the contents of the file at the given path, using a memory-mapped file.</summary>
3765 //<param name="path">File path.</param>
3766 //<param name="optmsk">Parser options.</param>
3767 //<returns>
3768 // True (1) if the file was parsed successfully, false (0) if open failed,
3769 // and -1 if an exception occured.
3770 //</returns>
3771 //<remarks>
3772 // The file contents are available until closed by calling 'parse',
3773 // 'parse_file', 'clear' or '~xml_parser'.
3774 //</remarks>
3775 int parse_mmfile(const TCHAR* path,unsigned long optmsk = parse_noset)
3777 int status = 0;
3778 if(path)
3780 clear(); //Clear any existing data.
3781 if(optmsk != parse_noset) _optmsk = optmsk;
3782 assert((optmsk & parse_wnorm) == 0); //Normalization isn't implemented for memory-mapped files, as of 23 Jan 2003.
3783 const bool readonly = (optmsk & (parse_dtd|parse_dtd_only)) == 0;
3784 if(open_mmfile(path,readonly,false))
3786 //If the file has a 0 at the end we are ok to proceed, otherwise add one.
3790 *(((TCHAR*)_mmaddr) + _mfsize) == 0
3793 _mfsize > 0 &&
3794 *(((TCHAR*)_mmaddr) + _mfsize - 1) == 0
3798 open_mmfile(path,false,true) //Re-open and add 0 at EOF.
3803 _xmldoc = new_node(node_document);
3804 _xmldoc->parent = _xmldoc; //Point to self.
3805 TCHAR* s = pug::parse((TCHAR*)_mmaddr,_xmldoc,_growby,_optmsk);
3806 _strpos = s;
3807 status = 1;
3809 catch(...)
3811 status = -1;
3812 assert(false);
3817 return status;
3820 protected:
3822 //<summary>Opens the specified memory-mapped file.</summary>
3823 //<param name="path">File path.</param>
3824 //<param name="readonly">True to open the file for read-only access.</param>
3825 //<param name="addeos">True to add a 0 to the end of the file.</param>
3826 //<returns>Success if the file was opened.</returns>
3827 bool open_mmfile(const TCHAR* path,const bool readonly,const bool addeos)
3829 clear(); //Close any existing MMF and clear any existing data.
3830 assert(_mmfile == NULL && _mmfile == NULL && _mmaddr == NULL);
3831 _addeos = false;
3832 _mmfile = CreateFile(path,readonly?GENERIC_READ:GENERIC_READ|GENERIC_WRITE,0,NULL,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,NULL); //Open read-only, no share, no security attrs, ..., no template.
3833 if(_mmfile != INVALID_HANDLE_VALUE)
3835 _mfsize = ::GetFileSize(_mmfile,NULL);
3836 _mmfmap = CreateFileMapping(_mmfile,NULL,readonly?PAGE_READONLY:PAGE_READWRITE,0,_mfsize+(addeos?sizeof(TCHAR):0),NULL); //Create map: handle, no security attr, read|read/write, larger if addeos, anonymous.
3837 if(_mmfmap != NULL)
3839 assert(_mmaddr == NULL);
3840 _mmaddr = MapViewOfFile(_mmfmap,readonly?FILE_MAP_READ:FILE_MAP_WRITE,0,0,0); //Map the view: handle, read|read/write, start at beginning, map entire file.
3841 if(_mmaddr != NULL)
3843 if(addeos) //Add a terminating 0 to the end of the file for 'parse()'.
3845 assert(!readonly);
3846 *(((TCHAR*)_mmaddr) + _mfsize) = 0;
3847 _addeos = true;
3850 else
3852 CloseHandle(_mmfmap);
3853 CloseHandle(_mmfile);
3854 _mmfile = _mmfmap = 0;
3857 else
3859 CloseHandle(_mmfile);
3860 _mmfile = 0;
3863 return (_mmaddr != NULL);
3866 #endif
3871 //<summary>An array of nodes, used by xml_node queries.</summary>
3872 class xml_node_list: public pointer_array
3874 public:
3875 xml_node_list(unsigned int grow = 4) : pointer_array(grow) { }
3876 virtual ~xml_node_list(){ }
3877 public:
3878 xml_node at(long i){ return xml_node((xml_node_struct*)pointer_array::at((unsigned int)i)); } //Access xml_node at subscript.
3879 xml_node operator[](long i){ return xml_node((xml_node_struct*)pointer_array::at((unsigned int)i)); } //Access xml_node at subscript.
3880 friend std::ostream& operator<<(std::ostream& os,xml_node_list& list) //Output helper.
3882 if(!os.good()) return os;
3883 unsigned int n = list.size();
3884 for(unsigned int i=0; i<n; ++i) os << list[i];
3885 return os;
3890 } } } }
3892 // Undefine these horrible macros
3893 #undef PUGOPT_MEMFIL
3894 #undef PUGOPT_NONSEG
3895 #undef PUGAPI_INTERNAL_VARIANT
3896 #undef PUGAPI_INTERNAL_VERSION_MAJOR
3897 #undef PUGAPI_INTERNAL_VERSION_MINOR
3898 #undef PUGAPI_INTERNAL_VERSION
3899 #undef PUGDEF_ATTR_NAME_SIZE
3900 #undef PUGDEF_ATTR_VALU_SIZE
3901 #undef PUGDEF_ELEM_NAME_SIZE
3902 #undef SKIPWS
3903 #undef OPTSET
3904 #undef PUSHNODE
3905 #undef POPNODE
3906 #undef SCANFOR
3907 #undef SCANWHILE
3909 #ifdef UNDEF_LOHIWORD
3910 #undef HIWORD
3911 #undef LOWORD
3912 #undef UNDEF_LOHIWORD
3913 #endif
3915 #ifdef UNDEF_TCHAR_AND_REST
3916 #undef TCHAR
3917 #undef _tcslen
3918 #undef _istalnum
3919 #undef _tcsncpy
3920 #undef _tcscpy
3921 #undef _tcscmp
3922 #undef _tcstol
3923 #undef _tcstod
3924 #undef _tcstok
3925 #undef _stprintf
3926 #undef _T
3927 #undef UNDEF_TCHAR_AND_REST
3928 #endif
3930 #endif