1 // ----------------------------------------------------------------------------
2 // Copyright (C) 2002-2006 Marcin Kalicinski
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
8 // Based on XML grammar by Daniel C. Nuffer
9 // http://spirit.sourceforge.net/repository/applications/xml.zip
11 // For more information, see www.boost.org
12 // ----------------------------------------------------------------------------
13 #ifndef BOOST_PROPERTY_TREE_DETAIL_XML_PARSER_READ_SPIRIT_HPP_INCLUDED
14 #define BOOST_PROPERTY_TREE_DETAIL_XML_PARSER_READ_SPIRIT_HPP_INCLUDED
16 //#define BOOST_SPIRIT_DEBUG
18 #include <boost/property_tree/ptree.hpp>
19 #include <boost/property_tree/detail/xml_parser_error.hpp>
20 #include <boost/property_tree/detail/xml_parser_flags.hpp>
21 #include <boost/property_tree/detail/xml_parser_utils.hpp>
22 #include <boost/spirit.hpp>
23 #include <boost/spirit/iterator/position_iterator.hpp>
29 namespace boost
{ namespace property_tree
{ namespace xml_parser
37 typedef typename
Ptree::key_type::value_type Ch
;
38 typedef std::basic_string
<Ch
> Str
;
39 typedef typename
Ptree::path_type Path
;
40 typedef boost::spirit::position_iterator
<typename
std::vector
<Ch
>::const_iterator
> It
;
43 std::vector
<Ptree
*> stack
;
45 ///////////////////////////////////////////////////////////////////////
51 a_key_s(context
&c
): c(c
) { }
52 void operator()(It b
, It e
) const
55 BOOST_PROPERTY_TREE_THROW(xml_parser_error("xml parse error",
56 detail::narrow(b
.get_position().file
.c_str()),
57 b
.get_position().line
));
59 Ptree
*child
= &c
.stack
.back()->push_back(std::make_pair(name
, Ptree()))->second
;
60 c
.stack
.push_back(child
);
67 a_key_e(context
&c
): c(c
) { }
68 void operator()(It b
, It e
) const
70 if (c
.stack
.size() <= 1)
71 BOOST_PROPERTY_TREE_THROW(xml_parser_error("xml parse error",
72 detail::narrow(b
.get_position().file
.c_str()),
73 b
.get_position().line
));
81 a_content(context
&c
): c(c
) { }
82 void operator()(It b
, It e
) const
84 Str s
= decode_char_entities(detail::trim(condense(Str(b
, e
))));
87 if (c
.flags
& no_concat_text
)
88 c
.stack
.back()->push_back(std::make_pair(xmltext
<Ch
>(), Ptree(s
)));
90 c
.stack
.back()->put_value(c
.stack
.back()->template get_value
<std::basic_string
<Ch
> >() + s
);
98 a_attr_key(context
&c
): c(c
) { }
99 void operator()(It b
, It e
) const
101 Path
p(xmlattr
<Ch
>());
103 c
.stack
.back()->put_child(p
, empty_ptree
<Ptree
>());
110 a_attr_data(context
&c
): c(c
) { }
111 void operator()(It b
, It e
) const
113 Ptree
&attr
= c
.stack
.back()->get_child(xmlattr
<Ch
>());
114 attr
.back().second
.put_value(Str(b
.base() + 1, e
.base() - 1));
121 a_comment(context
&c
): c(c
) { }
122 void operator()(It b
, It e
) const
124 c
.stack
.back()->push_back(std::make_pair(xmlcomment
<Ch
>(), Ptree(Str(b
, e
))));
130 ///////////////////////////////////////////////////////////////////////
133 template<class Ptree
>
134 struct xml_grammar
: public boost::spirit::grammar
<xml_grammar
<Ptree
> >
137 typedef context
<Ptree
> context_t
;
141 template<class ScannerT
>
145 typedef typename
ScannerT::value_t char_t
;
146 typedef boost::spirit::chset
<char_t
> chset_t
;
148 boost::spirit::rule
<ScannerT
>
149 prolog
, element
, Misc
, PEReference
, Reference
, PITarget
, CData
,
150 doctypedecl
, XMLDecl
, SDDecl
, VersionInfo
, EncodingDecl
, VersionNum
,
151 Eq
, DeclSep
, ExternalID
, markupdecl
, NotationDecl
, EntityDecl
,
152 AttlistDecl
, elementdecl
, TextDecl
, extSubsetDecl
, conditionalSect
,
153 EmptyElemTag
, STag
, content
, ETag
, Attribute
, contentspec
, Mixed
,
154 children
, choice
, seq
, cp
, AttDef
, AttType
, DefaultDecl
, StringType
,
155 TokenizedType
, EnumeratedType
, NotationType
, Enumeration
, EntityValue
,
156 AttValue
, SystemLiteral
, PubidLiteral
, CharDataChar
, CharData
, Comment
,
157 PI
, CDSect
, extSubset
, includeSect
, ignoreSect
, ignoreSectContents
,
158 Ignore
, CharRef
, EntityRef
, GEDecl
, PEDecl
, EntityDef
, PEDef
,
159 NDataDecl
, extParsedEnt
, EncName
, PublicID
, document
, S
, Name
, Names
,
160 Nmtoken
, Nmtokens
, STagB
, STagE1
, STagE2
;
162 definition(const xml_grammar
&self
)
165 using namespace boost::spirit
;
168 chset_t
Char("\x9\xA\xD\x20-\x7F");
169 chset_t
Sch("\x20\x9\xD\xA");
170 chset_t
Letter("\x41-\x5A\x61-\x7A");
171 chset_t
Digit("0-9");
172 chset_t
XDigit("0-9A-Fa-f");
173 chset_t
Extender("\xB7");
184 prolog
>> element
>> *Misc
205 Nmtoken
>> *(S
>> Nmtoken
)
209 '"' >> *( (anychar_p
- (chset_t(detail::widen
<char_t
>("%&\"").c_str())))
213 | '\'' >> *( (anychar_p
- (chset_t("%&'")))
220 '"' >> *( (anychar_p
- (chset_t("<&\"")))
223 | '\'' >> *( (anychar_p
- (chset_t("<&'")))
229 ('"' >> *(anychar_p
- '"') >> '"')
230 | ('\'' >> *(anychar_p
- '\'') >> '\'')
233 chset_t
PubidChar("\x20\xD\xA'a-zA-Z0-9()+,./:=?;!*#@$_%-");
236 '"' >> *PubidChar
>> '"'
237 | '\'' >> *(PubidChar
- '\'') >> '\''
241 //anychar_p - (chset_t("<&"))
242 anychar_p
- (chset_t("<"))
246 *(CharDataChar
- "]]>")
254 | ('-' >> (Char
- '-'))
256 )[typename
context_t::a_comment(self
.c
)]
261 "<?" >> PITarget
>> !(S
>> (*(Char
- "?>"))) >> "?>"
265 Name
- (as_lower_d
["xml"])
269 "<![CDATA[" >> CData
>> "]]>"
277 !XMLDecl
>> *Misc
>> !(doctypedecl
>> *Misc
)
281 "<?xml" >> VersionInfo
>> !EncodingDecl
>> !SDDecl
286 S
>> "version" >> Eq
>>
288 '\'' >> VersionNum
>> '\''
289 | '"' >> VersionNum
>> '"'
297 chset_t
VersionNumCh("a-zA-Z0-9_.:-");
310 "<!DOCTYPE" >> S
>> Name
>> !(S
>> ExternalID
) >> !S
>>
312 '[' >> *(markupdecl
| DeclSep
) >> ']' >> !S
332 !TextDecl
>> extSubsetDecl
344 S
>> "standalone" >> Eq
>>
346 ('\'' >> (str_p("yes") | "no") >> '\'')
347 | ('"' >> (str_p("yes") | "no") >> '"')
354 | STag >> content >> ETag
358 STagB
>> (STagE2
| (STagE1
>> content
>> ETag
))[typename
context_t::a_key_e(self
.c
)]
362 '<' >> Name
>> *(S
>> Attribute
) >> !S
>> '>'
367 >> Name
[typename
context_t::a_key_s(self
.c
)]
381 Name
[typename
context_t::a_attr_key(self
.c
)]
383 >> AttValue
[typename
context_t::a_attr_data(self
.c
)]
387 "</" >> Name
>> !S
>> '>'
391 !(CharData
[typename
context_t::a_content(self
.c
)]) >>
400 !(CharData
[typename
context_t::a_content(self
.c
)])
405 '<' >> Name
>> *(S
>> Attribute
) >> !S
>> "/>"
409 "<!ELEMENT" >> S
>> Name
>> S
>> contentspec
>> !S
>> '>'
420 (choice
| seq
) >> !(ch_p('?') | '*' | '+')
424 (Name
| choice
| seq
) >> !(ch_p('?') | '*' | '+')
429 >> +(!S
>> '|' >> !S
>> cp
)
435 *(!S
>> ',' >> !S
>> cp
)
440 '(' >> !S
>> "#PCDATA"
441 >> *(!S
>> '|' >> !S
>> Name
)
443 | '(' >> !S
>> "#PCDATA" >> !S
>> ')'
447 "<!ATTLIST" >> S
>> Name
>> *AttDef
>> !S
>> '>'
451 S
>> Name
>> S
>> AttType
>> S
>> DefaultDecl
482 "NOTATION" >> S
>> '(' >> !S
>> Name
483 >> *(!S
>> '|' >> !S
>> Name
)
489 >> *(!S
>> '|' >> !S
>> Nmtoken
)
496 | !("#FIXED" >> S
) >> AttValue
505 "<![" >> !S
>> "INCLUDE" >> !S
506 >> '[' >> extSubsetDecl
>> "]]>"
510 "<![" >> !S
>> "IGNORE" >> !S
511 >> '[' >> *ignoreSectContents
>> "]]>"
515 Ignore
>> *("<![" >> ignoreSectContents
>> "]]>" >> Ignore
)
519 *(Char
- (str_p("<![") | "]]>"))
523 "&#" >> +Digit
>> ';'
524 | "&#x" >> +XDigit
>> ';'
546 "<!ENTITY" >> S
>> Name
>> S
>> EntityDef
>> !S
>> '>'
550 "<!ENTITY" >> S
>> '%' >> S
>> Name
>> S
>> PEDef
556 | ExternalID
>> !NDataDecl
565 "SYSTEM" >> S
>> SystemLiteral
566 | "PUBLIC" >> S
>> PubidLiteral
>> S
>> SystemLiteral
570 S
>> "NDATA" >> S
>> Name
574 "<?xml" >> !VersionInfo
>> EncodingDecl
>> !S
>> "?>"
582 S
>> "encoding" >> Eq
583 >> ( '"' >> EncName
>> '"'
584 | '\'' >> EncName
>> '\''
589 Letter
>> *(Letter
| Digit
| '.' | '_' | '-')
593 "<!NOTATION" >> S
>> Name
>> S
594 >> (ExternalID
| PublicID
) >> !S
>> '>'
598 "PUBLIC" >> S
>> PubidLiteral
601 BOOST_SPIRIT_DEBUG_RULE(document
);
602 BOOST_SPIRIT_DEBUG_RULE(prolog
);
603 BOOST_SPIRIT_DEBUG_RULE(element
);
604 BOOST_SPIRIT_DEBUG_RULE(Misc
);
605 BOOST_SPIRIT_DEBUG_RULE(PEReference
);
606 BOOST_SPIRIT_DEBUG_RULE(Reference
);
607 BOOST_SPIRIT_DEBUG_RULE(PITarget
);
608 BOOST_SPIRIT_DEBUG_RULE(CData
);
609 BOOST_SPIRIT_DEBUG_RULE(doctypedecl
);
610 BOOST_SPIRIT_DEBUG_RULE(XMLDecl
);
611 BOOST_SPIRIT_DEBUG_RULE(SDDecl
);
612 BOOST_SPIRIT_DEBUG_RULE(VersionInfo
);
613 BOOST_SPIRIT_DEBUG_RULE(EncodingDecl
);
614 BOOST_SPIRIT_DEBUG_RULE(VersionNum
);
615 BOOST_SPIRIT_DEBUG_RULE(Eq
);
616 BOOST_SPIRIT_DEBUG_RULE(DeclSep
);
617 BOOST_SPIRIT_DEBUG_RULE(ExternalID
);
618 BOOST_SPIRIT_DEBUG_RULE(markupdecl
);
619 BOOST_SPIRIT_DEBUG_RULE(NotationDecl
);
620 BOOST_SPIRIT_DEBUG_RULE(EntityDecl
);
621 BOOST_SPIRIT_DEBUG_RULE(AttlistDecl
);
622 BOOST_SPIRIT_DEBUG_RULE(elementdecl
);
623 BOOST_SPIRIT_DEBUG_RULE(TextDecl
);
624 BOOST_SPIRIT_DEBUG_RULE(extSubsetDecl
);
625 BOOST_SPIRIT_DEBUG_RULE(conditionalSect
);
626 BOOST_SPIRIT_DEBUG_RULE(EmptyElemTag
);
627 BOOST_SPIRIT_DEBUG_RULE(STag
);
628 BOOST_SPIRIT_DEBUG_RULE(content
);
629 BOOST_SPIRIT_DEBUG_RULE(ETag
);
630 BOOST_SPIRIT_DEBUG_RULE(Attribute
);
631 BOOST_SPIRIT_DEBUG_RULE(contentspec
);
632 BOOST_SPIRIT_DEBUG_RULE(Mixed
);
633 BOOST_SPIRIT_DEBUG_RULE(children
);
634 BOOST_SPIRIT_DEBUG_RULE(choice
);
635 BOOST_SPIRIT_DEBUG_RULE(seq
);
636 BOOST_SPIRIT_DEBUG_RULE(cp
);
637 BOOST_SPIRIT_DEBUG_RULE(AttDef
);
638 BOOST_SPIRIT_DEBUG_RULE(AttType
);
639 BOOST_SPIRIT_DEBUG_RULE(DefaultDecl
);
640 BOOST_SPIRIT_DEBUG_RULE(StringType
);
641 BOOST_SPIRIT_DEBUG_RULE(TokenizedType
);
642 BOOST_SPIRIT_DEBUG_RULE(EnumeratedType
);
643 BOOST_SPIRIT_DEBUG_RULE(NotationType
);
644 BOOST_SPIRIT_DEBUG_RULE(Enumeration
);
645 BOOST_SPIRIT_DEBUG_RULE(EntityValue
);
646 BOOST_SPIRIT_DEBUG_RULE(AttValue
);
647 BOOST_SPIRIT_DEBUG_RULE(SystemLiteral
);
648 BOOST_SPIRIT_DEBUG_RULE(PubidLiteral
);
649 BOOST_SPIRIT_DEBUG_RULE(CharDataChar
);
650 BOOST_SPIRIT_DEBUG_RULE(CharData
);
651 BOOST_SPIRIT_DEBUG_RULE(Comment
);
652 BOOST_SPIRIT_DEBUG_RULE(PI
);
653 BOOST_SPIRIT_DEBUG_RULE(CDSect
);
654 BOOST_SPIRIT_DEBUG_RULE(extSubset
);
655 BOOST_SPIRIT_DEBUG_RULE(includeSect
);
656 BOOST_SPIRIT_DEBUG_RULE(ignoreSect
);
657 BOOST_SPIRIT_DEBUG_RULE(ignoreSectContents
);
658 BOOST_SPIRIT_DEBUG_RULE(Ignore
);
659 BOOST_SPIRIT_DEBUG_RULE(CharRef
);
660 BOOST_SPIRIT_DEBUG_RULE(EntityRef
);
661 BOOST_SPIRIT_DEBUG_RULE(GEDecl
);
662 BOOST_SPIRIT_DEBUG_RULE(PEDecl
);
663 BOOST_SPIRIT_DEBUG_RULE(EntityDef
);
664 BOOST_SPIRIT_DEBUG_RULE(PEDef
);
665 BOOST_SPIRIT_DEBUG_RULE(NDataDecl
);
666 BOOST_SPIRIT_DEBUG_RULE(extParsedEnt
);
667 BOOST_SPIRIT_DEBUG_RULE(EncName
);
668 BOOST_SPIRIT_DEBUG_RULE(PublicID
);
669 BOOST_SPIRIT_DEBUG_RULE(document
);
670 BOOST_SPIRIT_DEBUG_RULE(S
);
671 BOOST_SPIRIT_DEBUG_RULE(Name
);
672 BOOST_SPIRIT_DEBUG_RULE(Names
);
673 BOOST_SPIRIT_DEBUG_RULE(Nmtoken
);
674 BOOST_SPIRIT_DEBUG_RULE(Nmtokens
);
675 BOOST_SPIRIT_DEBUG_RULE(STagB
);
676 BOOST_SPIRIT_DEBUG_RULE(STagE1
);
677 BOOST_SPIRIT_DEBUG_RULE(STagE2
);
681 const boost::spirit::rule
<ScannerT
> &start() const
690 template<class Ptree
>
691 void read_xml_internal(std::basic_istream
<typename
Ptree::key_type::value_type
> &stream
,
694 const std::string
&filename
)
697 typedef typename
Ptree::key_type::value_type Ch
;
698 typedef boost::spirit::position_iterator
<typename
std::vector
<Ch
>::const_iterator
> It
;
700 BOOST_ASSERT(validate_flags(flags
));
702 // Load data into vector
703 std::vector
<Ch
> v(std::istreambuf_iterator
<Ch
>(stream
.rdbuf()),
704 std::istreambuf_iterator
<Ch
>());
706 BOOST_PROPERTY_TREE_THROW(xml_parser_error("read error", filename
, 0));
708 // Initialize iterators
709 It
begin(v
.begin(), v
.end());
710 It
end(v
.end(), v
.end());;
712 begin
.set_position(detail::widen
<Ch
>(filename
.c_str()));
716 xml_grammar
<Ptree
> g
;
717 g
.c
.stack
.push_back(&local
); // Push root ptree on context stack
721 boost::spirit::parse_info
<It
> result
= boost::spirit::parse(begin
, end
, g
);
722 if (!result
.full
|| g
.c
.stack
.size() != 1)
723 BOOST_PROPERTY_TREE_THROW(xml_parser_error("xml parse error",
724 detail::narrow(result
.stop
.get_position().file
.c_str()),
725 result
.stop
.get_position().line
));