fix doc example typo
[boost.git] / boost / property_tree / detail / xml_parser_read_spirit.hpp
blobd4eaea5b6cc0f207a634d8b6eb4026b7b6d45902
1 // ----------------------------------------------------------------------------
2 // Copyright (C) 2002-2006 Marcin Kalicinski
3 //
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // Based on XML grammar by Daniel C. Nuffer
9 // http://spirit.sourceforge.net/repository/applications/xml.zip
10 //
11 // For more information, see www.boost.org
12 // ----------------------------------------------------------------------------
13 #ifndef BOOST_PROPERTY_TREE_DETAIL_XML_PARSER_READ_SPIRIT_HPP_INCLUDED
14 #define BOOST_PROPERTY_TREE_DETAIL_XML_PARSER_READ_SPIRIT_HPP_INCLUDED
16 //#define BOOST_SPIRIT_DEBUG
18 #include <boost/property_tree/ptree.hpp>
19 #include <boost/property_tree/detail/xml_parser_error.hpp>
20 #include <boost/property_tree/detail/xml_parser_flags.hpp>
21 #include <boost/property_tree/detail/xml_parser_utils.hpp>
22 #include <boost/spirit.hpp>
23 #include <boost/spirit/iterator/position_iterator.hpp>
24 #include <string>
25 #include <locale>
26 #include <istream>
27 #include <vector>
29 namespace boost { namespace property_tree { namespace xml_parser
32 // XML parser context
33 template<class Ptree>
34 struct context
37 typedef typename Ptree::key_type::value_type Ch;
38 typedef std::basic_string<Ch> Str;
39 typedef typename Ptree::path_type Path;
40 typedef boost::spirit::position_iterator<typename std::vector<Ch>::const_iterator> It;
42 int flags;
43 std::vector<Ptree *> stack;
45 ///////////////////////////////////////////////////////////////////////
46 // Actions
48 struct a_key_s
50 context &c;
51 a_key_s(context &c): c(c) { }
52 void operator()(It b, It e) const
54 if (c.stack.empty())
55 BOOST_PROPERTY_TREE_THROW(xml_parser_error("xml parse error",
56 detail::narrow(b.get_position().file.c_str()),
57 b.get_position().line));
58 Str name(b, e);
59 Ptree *child = &c.stack.back()->push_back(std::make_pair(name, Ptree()))->second;
60 c.stack.push_back(child);
64 struct a_key_e
66 context &c;
67 a_key_e(context &c): c(c) { }
68 void operator()(It b, It e) const
70 if (c.stack.size() <= 1)
71 BOOST_PROPERTY_TREE_THROW(xml_parser_error("xml parse error",
72 detail::narrow(b.get_position().file.c_str()),
73 b.get_position().line));
74 c.stack.pop_back();
78 struct a_content
80 context &c;
81 a_content(context &c): c(c) { }
82 void operator()(It b, It e) const
84 Str s = decode_char_entities(detail::trim(condense(Str(b, e))));
85 if (!s.empty())
87 if (c.flags & no_concat_text)
88 c.stack.back()->push_back(std::make_pair(xmltext<Ch>(), Ptree(s)));
89 else
90 c.stack.back()->put_value(c.stack.back()->template get_value<std::basic_string<Ch> >() + s);
95 struct a_attr_key
97 context &c;
98 a_attr_key(context &c): c(c) { }
99 void operator()(It b, It e) const
101 Path p(xmlattr<Ch>());
102 p /= Str(b, e);
103 c.stack.back()->put_child(p, empty_ptree<Ptree>());
107 struct a_attr_data
109 context &c;
110 a_attr_data(context &c): c(c) { }
111 void operator()(It b, It e) const
113 Ptree &attr = c.stack.back()->get_child(xmlattr<Ch>());
114 attr.back().second.put_value(Str(b.base() + 1, e.base() - 1));
118 struct a_comment
120 context &c;
121 a_comment(context &c): c(c) { }
122 void operator()(It b, It e) const
124 c.stack.back()->push_back(std::make_pair(xmlcomment<Ch>(), Ptree(Str(b, e))));
130 ///////////////////////////////////////////////////////////////////////
131 // Grammar
133 template<class Ptree>
134 struct xml_grammar: public boost::spirit::grammar<xml_grammar<Ptree> >
137 typedef context<Ptree> context_t;
139 mutable context_t c;
141 template<class ScannerT>
142 struct definition
145 typedef typename ScannerT::value_t char_t;
146 typedef boost::spirit::chset<char_t> chset_t;
148 boost::spirit::rule<ScannerT>
149 prolog, element, Misc, PEReference, Reference, PITarget, CData,
150 doctypedecl, XMLDecl, SDDecl, VersionInfo, EncodingDecl, VersionNum,
151 Eq, DeclSep, ExternalID, markupdecl, NotationDecl, EntityDecl,
152 AttlistDecl, elementdecl, TextDecl, extSubsetDecl, conditionalSect,
153 EmptyElemTag, STag, content, ETag, Attribute, contentspec, Mixed,
154 children, choice, seq, cp, AttDef, AttType, DefaultDecl, StringType,
155 TokenizedType, EnumeratedType, NotationType, Enumeration, EntityValue,
156 AttValue, SystemLiteral, PubidLiteral, CharDataChar, CharData, Comment,
157 PI, CDSect, extSubset, includeSect, ignoreSect, ignoreSectContents,
158 Ignore, CharRef, EntityRef, GEDecl, PEDecl, EntityDef, PEDef,
159 NDataDecl, extParsedEnt, EncName, PublicID, document, S, Name, Names,
160 Nmtoken, Nmtokens, STagB, STagE1, STagE2;
162 definition(const xml_grammar &self)
165 using namespace boost::spirit;
167 // XML Char sets
168 chset_t Char("\x9\xA\xD\x20-\x7F");
169 chset_t Sch("\x20\x9\xD\xA");
170 chset_t Letter("\x41-\x5A\x61-\x7A");
171 chset_t Digit("0-9");
172 chset_t XDigit("0-9A-Fa-f");
173 chset_t Extender("\xB7");
174 chset_t NameChar =
175 Letter
176 | Digit
177 | (char_t)'.'
178 | (char_t)'-'
179 | (char_t)'_'
180 | (char_t)':'
181 | Extender;
183 document =
184 prolog >> element >> *Misc
187 S =
188 +(Sch)
191 Name =
192 (Letter | '_' | ':')
193 >> *(NameChar)
196 Names =
197 Name >> *(S >> Name)
200 Nmtoken =
201 +NameChar
204 Nmtokens =
205 Nmtoken >> *(S >> Nmtoken)
208 EntityValue =
209 '"' >> *( (anychar_p - (chset_t(detail::widen<char_t>("%&\"").c_str())))
210 | PEReference
211 | Reference)
212 >> '"'
213 | '\'' >> *( (anychar_p - (chset_t("%&'")))
214 | PEReference
215 | Reference)
216 >> '\''
219 AttValue =
220 '"' >> *( (anychar_p - (chset_t("<&\"")))
221 | Reference)
222 >> '"'
223 | '\'' >> *( (anychar_p - (chset_t("<&'")))
224 | Reference)
225 >> '\''
228 SystemLiteral=
229 ('"' >> *(anychar_p - '"') >> '"')
230 | ('\'' >> *(anychar_p - '\'') >> '\'')
233 chset_t PubidChar("\x20\xD\xA'a-zA-Z0-9()+,./:=?;!*#@$_%-");
235 PubidLiteral =
236 '"' >> *PubidChar >> '"'
237 | '\'' >> *(PubidChar - '\'') >> '\''
240 CharDataChar =
241 //anychar_p - (chset_t("<&"))
242 anychar_p - (chset_t("<"))
245 CharData =
246 *(CharDataChar - "]]>")
249 Comment =
250 "<!--" >>
253 (Char - '-')
254 | ('-' >> (Char - '-'))
256 )[typename context_t::a_comment(self.c)]
257 >> "-->"
260 PI =
261 "<?" >> PITarget >> !(S >> (*(Char - "?>"))) >> "?>"
264 PITarget =
265 Name - (as_lower_d["xml"])
268 CDSect =
269 "<![CDATA[" >> CData >> "]]>"
272 CData =
273 *(Char - "]]>")
276 prolog =
277 !XMLDecl >> *Misc >> !(doctypedecl >> *Misc)
280 XMLDecl =
281 "<?xml" >> VersionInfo >> !EncodingDecl >> !SDDecl
282 >> !S >> "?>"
285 VersionInfo =
286 S >> "version" >> Eq >>
288 '\'' >> VersionNum >> '\''
289 | '"' >> VersionNum >> '"'
293 Eq =
294 !S >> '=' >> !S
297 chset_t VersionNumCh("a-zA-Z0-9_.:-");
299 VersionNum =
300 +(VersionNumCh)
303 Misc =
304 Comment
305 | PI
309 doctypedecl =
310 "<!DOCTYPE" >> S >> Name >> !(S >> ExternalID) >> !S >>
312 '[' >> *(markupdecl | DeclSep) >> ']' >> !S
314 >> '>'
317 DeclSep =
318 PEReference
322 markupdecl =
323 elementdecl
324 | AttlistDecl
325 | EntityDecl
326 | NotationDecl
327 | PI
328 | Comment
331 extSubset =
332 !TextDecl >> extSubsetDecl
335 extSubsetDecl =
337 markupdecl
338 | conditionalSect
339 | DeclSep
343 SDDecl =
344 S >> "standalone" >> Eq >>
346 ('\'' >> (str_p("yes") | "no") >> '\'')
347 | ('"' >> (str_p("yes") | "no") >> '"')
352 element =
353 EmptyElemTag
354 | STag >> content >> ETag
357 element =
358 STagB >> (STagE2 | (STagE1 >> content >> ETag))[typename context_t::a_key_e(self.c)]
361 STag =
362 '<' >> Name >> *(S >> Attribute) >> !S >> '>'
365 STagB =
367 >> Name[typename context_t::a_key_s(self.c)]
368 >> *(S >> Attribute)
369 >> !S
372 STagE1 =
373 ch_p(">")
376 STagE2 =
377 str_p("/>")
380 Attribute =
381 Name[typename context_t::a_attr_key(self.c)]
382 >> Eq
383 >> AttValue[typename context_t::a_attr_data(self.c)]
386 ETag =
387 "</" >> Name >> !S >> '>'
390 content =
391 !(CharData[typename context_t::a_content(self.c)]) >>
394 element
395 // | Reference
396 | CDSect
397 | PI
398 | Comment
399 ) >>
400 !(CharData[typename context_t::a_content(self.c)])
404 EmptyElemTag =
405 '<' >> Name >> *(S >> Attribute) >> !S >> "/>"
408 elementdecl =
409 "<!ELEMENT" >> S >> Name >> S >> contentspec >> !S >> '>'
412 contentspec =
413 str_p("EMPTY")
414 | "ANY"
415 | Mixed
416 | children
419 children =
420 (choice | seq) >> !(ch_p('?') | '*' | '+')
423 cp =
424 (Name | choice | seq) >> !(ch_p('?') | '*' | '+')
427 choice =
428 '(' >> !S >> cp
429 >> +(!S >> '|' >> !S >> cp)
430 >> !S >> ')'
433 seq =
434 '(' >> !S >> cp >>
435 *(!S >> ',' >> !S >> cp)
436 >> !S >> ')'
439 Mixed =
440 '(' >> !S >> "#PCDATA"
441 >> *(!S >> '|' >> !S >> Name)
442 >> !S >> ")*"
443 | '(' >> !S >> "#PCDATA" >> !S >> ')'
446 AttlistDecl =
447 "<!ATTLIST" >> S >> Name >> *AttDef >> !S >> '>'
450 AttDef =
451 S >> Name >> S >> AttType >> S >> DefaultDecl
454 AttType =
455 StringType
456 | TokenizedType
457 | EnumeratedType
460 StringType =
461 str_p("CDATA")
464 TokenizedType =
465 longest_d[
466 str_p("ID")
467 | "IDREF"
468 | "IDREFS"
469 | "ENTITY"
470 | "ENTITIES"
471 | "NMTOKEN"
472 | "NMTOKENS"
476 EnumeratedType =
477 NotationType
478 | Enumeration
481 NotationType =
482 "NOTATION" >> S >> '(' >> !S >> Name
483 >> *(!S >> '|' >> !S >> Name)
484 >> !S >> ')'
487 Enumeration =
488 '(' >> !S >> Nmtoken
489 >> *(!S >> '|' >> !S >> Nmtoken)
490 >> !S >> ')'
493 DefaultDecl =
494 str_p("#REQUIRED")
495 | "#IMPLIED"
496 | !("#FIXED" >> S) >> AttValue
499 conditionalSect =
500 includeSect
501 | ignoreSect
504 includeSect =
505 "<![" >> !S >> "INCLUDE" >> !S
506 >> '[' >> extSubsetDecl >> "]]>"
509 ignoreSect =
510 "<![" >> !S >> "IGNORE" >> !S
511 >> '[' >> *ignoreSectContents >> "]]>"
514 ignoreSectContents =
515 Ignore >> *("<![" >> ignoreSectContents >> "]]>" >> Ignore)
518 Ignore =
519 *(Char - (str_p("<![") | "]]>"))
522 CharRef =
523 "&#" >> +Digit >> ';'
524 | "&#x" >> +XDigit >> ';'
527 Reference =
528 EntityRef
529 | CharRef
532 EntityRef =
533 '&' >> Name >> ';'
536 PEReference =
537 '%' >> Name >> ';'
540 EntityDecl =
541 GEDecl
542 | PEDecl
545 GEDecl =
546 "<!ENTITY" >> S >> Name >> S >> EntityDef >> !S >> '>'
549 PEDecl =
550 "<!ENTITY" >> S >> '%' >> S >> Name >> S >> PEDef
551 >> !S >> '>'
554 EntityDef =
555 EntityValue
556 | ExternalID >> !NDataDecl
559 PEDef =
560 EntityValue
561 | ExternalID
564 ExternalID =
565 "SYSTEM" >> S >> SystemLiteral
566 | "PUBLIC" >> S >> PubidLiteral >> S >> SystemLiteral
569 NDataDecl =
570 S >> "NDATA" >> S >> Name
573 TextDecl =
574 "<?xml" >> !VersionInfo >> EncodingDecl >> !S >> "?>"
577 extParsedEnt =
578 !TextDecl >> content
581 EncodingDecl =
582 S >> "encoding" >> Eq
583 >> ( '"' >> EncName >> '"'
584 | '\'' >> EncName >> '\''
588 EncName =
589 Letter >> *(Letter | Digit | '.' | '_' | '-')
592 NotationDecl =
593 "<!NOTATION" >> S >> Name >> S
594 >> (ExternalID | PublicID) >> !S >> '>'
597 PublicID =
598 "PUBLIC" >> S >> PubidLiteral
601 BOOST_SPIRIT_DEBUG_RULE(document);
602 BOOST_SPIRIT_DEBUG_RULE(prolog);
603 BOOST_SPIRIT_DEBUG_RULE(element);
604 BOOST_SPIRIT_DEBUG_RULE(Misc);
605 BOOST_SPIRIT_DEBUG_RULE(PEReference);
606 BOOST_SPIRIT_DEBUG_RULE(Reference);
607 BOOST_SPIRIT_DEBUG_RULE(PITarget);
608 BOOST_SPIRIT_DEBUG_RULE(CData);
609 BOOST_SPIRIT_DEBUG_RULE(doctypedecl);
610 BOOST_SPIRIT_DEBUG_RULE(XMLDecl);
611 BOOST_SPIRIT_DEBUG_RULE(SDDecl);
612 BOOST_SPIRIT_DEBUG_RULE(VersionInfo);
613 BOOST_SPIRIT_DEBUG_RULE(EncodingDecl);
614 BOOST_SPIRIT_DEBUG_RULE(VersionNum);
615 BOOST_SPIRIT_DEBUG_RULE(Eq);
616 BOOST_SPIRIT_DEBUG_RULE(DeclSep);
617 BOOST_SPIRIT_DEBUG_RULE(ExternalID);
618 BOOST_SPIRIT_DEBUG_RULE(markupdecl);
619 BOOST_SPIRIT_DEBUG_RULE(NotationDecl);
620 BOOST_SPIRIT_DEBUG_RULE(EntityDecl);
621 BOOST_SPIRIT_DEBUG_RULE(AttlistDecl);
622 BOOST_SPIRIT_DEBUG_RULE(elementdecl);
623 BOOST_SPIRIT_DEBUG_RULE(TextDecl);
624 BOOST_SPIRIT_DEBUG_RULE(extSubsetDecl);
625 BOOST_SPIRIT_DEBUG_RULE(conditionalSect);
626 BOOST_SPIRIT_DEBUG_RULE(EmptyElemTag);
627 BOOST_SPIRIT_DEBUG_RULE(STag);
628 BOOST_SPIRIT_DEBUG_RULE(content);
629 BOOST_SPIRIT_DEBUG_RULE(ETag);
630 BOOST_SPIRIT_DEBUG_RULE(Attribute);
631 BOOST_SPIRIT_DEBUG_RULE(contentspec);
632 BOOST_SPIRIT_DEBUG_RULE(Mixed);
633 BOOST_SPIRIT_DEBUG_RULE(children);
634 BOOST_SPIRIT_DEBUG_RULE(choice);
635 BOOST_SPIRIT_DEBUG_RULE(seq);
636 BOOST_SPIRIT_DEBUG_RULE(cp);
637 BOOST_SPIRIT_DEBUG_RULE(AttDef);
638 BOOST_SPIRIT_DEBUG_RULE(AttType);
639 BOOST_SPIRIT_DEBUG_RULE(DefaultDecl);
640 BOOST_SPIRIT_DEBUG_RULE(StringType);
641 BOOST_SPIRIT_DEBUG_RULE(TokenizedType);
642 BOOST_SPIRIT_DEBUG_RULE(EnumeratedType);
643 BOOST_SPIRIT_DEBUG_RULE(NotationType);
644 BOOST_SPIRIT_DEBUG_RULE(Enumeration);
645 BOOST_SPIRIT_DEBUG_RULE(EntityValue);
646 BOOST_SPIRIT_DEBUG_RULE(AttValue);
647 BOOST_SPIRIT_DEBUG_RULE(SystemLiteral);
648 BOOST_SPIRIT_DEBUG_RULE(PubidLiteral);
649 BOOST_SPIRIT_DEBUG_RULE(CharDataChar);
650 BOOST_SPIRIT_DEBUG_RULE(CharData);
651 BOOST_SPIRIT_DEBUG_RULE(Comment);
652 BOOST_SPIRIT_DEBUG_RULE(PI);
653 BOOST_SPIRIT_DEBUG_RULE(CDSect);
654 BOOST_SPIRIT_DEBUG_RULE(extSubset);
655 BOOST_SPIRIT_DEBUG_RULE(includeSect);
656 BOOST_SPIRIT_DEBUG_RULE(ignoreSect);
657 BOOST_SPIRIT_DEBUG_RULE(ignoreSectContents);
658 BOOST_SPIRIT_DEBUG_RULE(Ignore);
659 BOOST_SPIRIT_DEBUG_RULE(CharRef);
660 BOOST_SPIRIT_DEBUG_RULE(EntityRef);
661 BOOST_SPIRIT_DEBUG_RULE(GEDecl);
662 BOOST_SPIRIT_DEBUG_RULE(PEDecl);
663 BOOST_SPIRIT_DEBUG_RULE(EntityDef);
664 BOOST_SPIRIT_DEBUG_RULE(PEDef);
665 BOOST_SPIRIT_DEBUG_RULE(NDataDecl);
666 BOOST_SPIRIT_DEBUG_RULE(extParsedEnt);
667 BOOST_SPIRIT_DEBUG_RULE(EncName);
668 BOOST_SPIRIT_DEBUG_RULE(PublicID);
669 BOOST_SPIRIT_DEBUG_RULE(document);
670 BOOST_SPIRIT_DEBUG_RULE(S);
671 BOOST_SPIRIT_DEBUG_RULE(Name);
672 BOOST_SPIRIT_DEBUG_RULE(Names);
673 BOOST_SPIRIT_DEBUG_RULE(Nmtoken);
674 BOOST_SPIRIT_DEBUG_RULE(Nmtokens);
675 BOOST_SPIRIT_DEBUG_RULE(STagB);
676 BOOST_SPIRIT_DEBUG_RULE(STagE1);
677 BOOST_SPIRIT_DEBUG_RULE(STagE2);
681 const boost::spirit::rule<ScannerT> &start() const
683 return document;
690 template<class Ptree>
691 void read_xml_internal(std::basic_istream<typename Ptree::key_type::value_type> &stream,
692 Ptree &pt,
693 int flags,
694 const std::string &filename)
697 typedef typename Ptree::key_type::value_type Ch;
698 typedef boost::spirit::position_iterator<typename std::vector<Ch>::const_iterator> It;
700 BOOST_ASSERT(validate_flags(flags));
702 // Load data into vector
703 std::vector<Ch> v(std::istreambuf_iterator<Ch>(stream.rdbuf()),
704 std::istreambuf_iterator<Ch>());
705 if (!stream.good())
706 BOOST_PROPERTY_TREE_THROW(xml_parser_error("read error", filename, 0));
708 // Initialize iterators
709 It begin(v.begin(), v.end());
710 It end(v.end(), v.end());;
712 begin.set_position(detail::widen<Ch>(filename.c_str()));
714 // Prepare grammar
715 Ptree local;
716 xml_grammar<Ptree> g;
717 g.c.stack.push_back(&local); // Push root ptree on context stack
718 g.c.flags = flags;
720 // Parse into local
721 boost::spirit::parse_info<It> result = boost::spirit::parse(begin, end, g);
722 if (!result.full || g.c.stack.size() != 1)
723 BOOST_PROPERTY_TREE_THROW(xml_parser_error("xml parse error",
724 detail::narrow(result.stop.get_position().file.c_str()),
725 result.stop.get_position().line));
727 // Swap local and pt
728 pt.swap(local);
731 } } }
733 #endif