2 Copyright 2013 Karel Matas
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * Parsers for JMdict and kanjidic2.
29 #include "3rdparty/rapidxml.hpp"
31 #include "datatypes.hxx"
33 using aoi::SEPARATOR_SQL
;
34 using aoi::ElementKanji
;
35 using aoi::ElementReading
;
36 using aoi::ElementSense
;
42 using rapidxml::xml_node
;
43 using rapidxml::xml_document
;
49 * Base XML parser class. Loads XML file and build XML tree. Must be subclassed.
50 * \todo Parser should escape aoi::SEPARATOR_SQL character right after loading
51 * file into memory (i.e. before parsing).
63 * Get values of all the elements of the type <i>element</i> in <i>node</i>.
64 * For example XML code:
68 <phone>1232456789</phone>
69 <phone>987654321</phone>
72 * get_elements( node, phone ) returns "{ "123456789", "987654321" }"
73 * \param parent parent node
74 * \param element what element to get
75 * \param unreference if true: remove '&' and ';' from the string borders
76 * \return values of all the elements <i>element</i> or empty vector
78 static vector
<string
> get_elements ( xml_node
<> *parent
, const char *element
,
79 bool unreference
=false );
82 BaseParser ( const char *filename
);
83 virtual ~BaseParser ();
86 * Scans first node of the document for the entities (<!ENTITY).
87 * \return map in format entity_name:entity_description
89 map
<string
,string
> get_entities ();
93 //! Parser for JMDict_e XML file.
94 class JmdictParser
: public BaseParser
102 xml_node
<> *entry_
= nullptr;
105 JmdictParser( const char *filename
) : BaseParser(filename
)
106 { entry_
= doc_
.first_node("JMdict")->first_node("entry"); };
110 * Gets one entry from JMdict. Caller should call this function until
111 * Dicword.did() != -1
113 JmdictParser p("file.xml");
114 DicWord w = p.get_entry();
115 while ( w.did() != -1 ){
116 printf("Word ID: %d\n", w.did());
120 * \return DicWord on succes, empty DicWord (did()=-1) otherwise
122 DicWord
get_entry ();
124 //! Returns JMDict version.
125 string
get_version ();
130 //! Parser for kanjidic2 XML file.
131 class KanjidicParser
: public BaseParser
135 xml_node
<> *entry_
= nullptr;
138 KanjidicParser( const char *filename
): BaseParser(filename
)
139 { entry_
= doc_
.first_node("kanjidic2")->first_node("character"); };
143 * Gets one entry from kanjidic2. Caller should call this function until
144 * Kanji.kanji() != ""
146 KanjidicParser p("file.xml");
147 Kanji k = p.get_entry();
148 while ( !k.kanji().empty() ){
149 printf("Kanji: %s\n", k.kanji().c_str());
153 * \return Kanji on success, empty Kanji (kanji()=="") otherwise
157 //! Returns kanjidic2 version in format: "version (date)"
158 string
get_version ();
162 } // namespace parsers
163 #endif // _PARSERS_HXX