2 * Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include <ZLFileImage.h>
23 #include <ZLUnicodeUtil.h>
25 #include "XHTMLReader.h"
26 #include "../util/EntityFilesCollector.h"
27 #include "../util/MiscUtil.h"
29 #include "../../bookmodel/BookReader.h"
30 #include "../../bookmodel/BookModel.h"
32 std::map
<std::string
,XHTMLTagAction
*> XHTMLReader::ourTagActions
;
34 XHTMLTagAction::~XHTMLTagAction() {
37 class XHTMLTagParagraphAction
: public XHTMLTagAction
{
40 void doAtStart(XHTMLReader
&reader
, const char **xmlattributes
);
41 void doAtEnd(XHTMLReader
&reader
);
44 class XHTMLTagRestartParagraphAction
: public XHTMLTagAction
{
47 void doAtStart(XHTMLReader
&reader
, const char **xmlattributes
);
48 void doAtEnd(XHTMLReader
&reader
);
51 class XHTMLTagImageAction
: public XHTMLTagAction
{
54 XHTMLTagImageAction(const std::string
&nameAttribute
);
56 void doAtStart(XHTMLReader
&reader
, const char **xmlattributes
);
57 void doAtEnd(XHTMLReader
&reader
);
60 const std::string myNameAttribute
;
63 class XHTMLTagItemAction
: public XHTMLTagAction
{
66 void doAtStart(XHTMLReader
&reader
, const char **xmlattributes
);
67 void doAtEnd(XHTMLReader
&reader
);
70 class XHTMLTagHyperlinkAction
: public XHTMLTagAction
{
73 void doAtStart(XHTMLReader
&reader
, const char **xmlattributes
);
74 void doAtEnd(XHTMLReader
&reader
);
77 std::stack
<FBTextKind
> myHyperlinkStack
;
80 class XHTMLTagControlAction
: public XHTMLTagAction
{
83 XHTMLTagControlAction(FBTextKind control
);
85 void doAtStart(XHTMLReader
&reader
, const char **xmlattributes
);
86 void doAtEnd(XHTMLReader
&reader
);
92 class XHTMLTagParagraphWithControlAction
: public XHTMLTagAction
{
95 XHTMLTagParagraphWithControlAction(FBTextKind control
);
97 void doAtStart(XHTMLReader
&reader
, const char **xmlattributes
);
98 void doAtEnd(XHTMLReader
&reader
);
101 FBTextKind myControl
;
104 class XHTMLTagPreAction
: public XHTMLTagAction
{
107 void doAtStart(XHTMLReader
&reader
, const char **xmlattributes
);
108 void doAtEnd(XHTMLReader
&reader
);
111 void XHTMLTagParagraphAction::doAtStart(XHTMLReader
&reader
, const char**) {
112 reader
.myModelReader
.beginParagraph();
115 void XHTMLTagParagraphAction::doAtEnd(XHTMLReader
&reader
) {
116 reader
.myModelReader
.endParagraph();
119 void XHTMLTagRestartParagraphAction::doAtStart(XHTMLReader
&reader
, const char**) {
120 reader
.myModelReader
.endParagraph();
121 reader
.myModelReader
.beginParagraph();
124 void XHTMLTagRestartParagraphAction::doAtEnd(XHTMLReader
&) {
127 void XHTMLTagItemAction::doAtStart(XHTMLReader
&reader
, const char**) {
128 reader
.myModelReader
.endParagraph();
129 // TODO: increase left indent
130 reader
.myModelReader
.beginParagraph();
131 // TODO: replace bullet sign by number inside OL tag
132 const std::string bullet
= "\xE2\x80\xA2\xC0\xA0";
133 reader
.myModelReader
.addData(bullet
);
136 void XHTMLTagItemAction::doAtEnd(XHTMLReader
&reader
) {
137 reader
.myModelReader
.endParagraph();
140 XHTMLTagImageAction::XHTMLTagImageAction(const std::string
&nameAttribute
) : myNameAttribute(nameAttribute
) {
143 void XHTMLTagImageAction::doAtStart(XHTMLReader
&reader
, const char **xmlattributes
) {
144 const char *fileName
= reader
.attributeValue(xmlattributes
, myNameAttribute
.c_str());
146 bool flag
= reader
.myModelReader
.paragraphIsOpen();
148 reader
.myModelReader
.endParagraph();
150 if ((strlen(fileName
) > 2) && strncmp(fileName
, "./", 2) == 0) {
153 const std::string fullfileName
= reader
.myPathPrefix
+ fileName
;
154 reader
.myModelReader
.addImageReference(fullfileName
);
155 reader
.myModelReader
.addImage(fullfileName
, new ZLFileImage("image/auto", fullfileName
, 0));
157 reader
.myModelReader
.beginParagraph();
162 void XHTMLTagImageAction::doAtEnd(XHTMLReader
&) {
165 XHTMLTagControlAction::XHTMLTagControlAction(FBTextKind control
) : myControl(control
) {
168 void XHTMLTagControlAction::doAtStart(XHTMLReader
&reader
, const char**) {
169 reader
.myModelReader
.pushKind(myControl
);
170 reader
.myModelReader
.addControl(myControl
, true);
173 void XHTMLTagControlAction::doAtEnd(XHTMLReader
&reader
) {
174 reader
.myModelReader
.addControl(myControl
, false);
175 reader
.myModelReader
.popKind();
178 void XHTMLTagHyperlinkAction::doAtStart(XHTMLReader
&reader
, const char **xmlattributes
) {
179 const char *href
= reader
.attributeValue(xmlattributes
, "href");
181 const std::string link
= (*href
== '#') ? (reader
.myReferenceName
+ href
) : href
;
182 FBTextKind hyperlinkType
= MiscUtil::isReference(link
) ? EXTERNAL_HYPERLINK
: INTERNAL_HYPERLINK
;
183 myHyperlinkStack
.push(hyperlinkType
);
184 reader
.myModelReader
.addHyperlinkControl(hyperlinkType
, link
);
186 myHyperlinkStack
.push(REGULAR
);
188 const char *name
= reader
.attributeValue(xmlattributes
, "name");
190 reader
.myModelReader
.addHyperlinkLabel(reader
.myReferenceName
+ "#" + name
);
194 void XHTMLTagHyperlinkAction::doAtEnd(XHTMLReader
&reader
) {
195 FBTextKind kind
= myHyperlinkStack
.top();
196 if (kind
!= REGULAR
) {
197 reader
.myModelReader
.addControl(kind
, false);
199 myHyperlinkStack
.pop();
202 XHTMLTagParagraphWithControlAction::XHTMLTagParagraphWithControlAction(FBTextKind control
) : myControl(control
) {
205 void XHTMLTagParagraphWithControlAction::doAtStart(XHTMLReader
&reader
, const char**) {
206 if ((myControl
== TITLE
) && (reader
.myModelReader
.model().bookTextModel()->paragraphsNumber() > 1)) {
207 reader
.myModelReader
.insertEndOfSectionParagraph();
209 reader
.myModelReader
.pushKind(myControl
);
210 reader
.myModelReader
.beginParagraph();
213 void XHTMLTagParagraphWithControlAction::doAtEnd(XHTMLReader
&reader
) {
214 reader
.myModelReader
.endParagraph();
215 reader
.myModelReader
.popKind();
218 void XHTMLTagPreAction::doAtStart(XHTMLReader
&reader
, const char**) {
219 reader
.myPreformatted
= true;
220 reader
.myModelReader
.beginParagraph();
221 reader
.myModelReader
.addControl(CODE
, true);
224 void XHTMLTagPreAction::doAtEnd(XHTMLReader
&reader
) {
225 reader
.myModelReader
.addControl(CODE
, false);
226 reader
.myModelReader
.endParagraph();
227 reader
.myPreformatted
= false;
230 void XHTMLReader::addAction(const std::string
&tag
, XHTMLTagAction
*action
) {
231 ourTagActions
.insert(std::pair
<std::string
,XHTMLTagAction
*>(tag
,action
));
234 void XHTMLReader::fillTagTable() {
235 if (ourTagActions
.empty()) {
236 //addAction("html", new XHTMLTagAction());
237 addAction("body", new XHTMLTagParagraphAction());
238 //addAction("title", new XHTMLTagAction());
239 //addAction("meta", new XHTMLTagAction());
240 //addAction("script", new XHTMLTagAction());
242 //addAction("font", new XHTMLTagAction());
243 //addAction("style", new XHTMLTagAction());
245 addAction("p", new XHTMLTagParagraphAction());
246 addAction("h1", new XHTMLTagParagraphWithControlAction(H1
));
247 addAction("h2", new XHTMLTagParagraphWithControlAction(H2
));
248 addAction("h3", new XHTMLTagParagraphWithControlAction(H3
));
249 addAction("h4", new XHTMLTagParagraphWithControlAction(H4
));
250 addAction("h5", new XHTMLTagParagraphWithControlAction(H5
));
251 addAction("h6", new XHTMLTagParagraphWithControlAction(H6
));
253 //addAction("ol", new XHTMLTagAction());
254 //addAction("ul", new XHTMLTagAction());
255 //addAction("dl", new XHTMLTagAction());
256 addAction("li", new XHTMLTagItemAction());
258 addAction("strong", new XHTMLTagControlAction(STRONG
));
259 addAction("b", new XHTMLTagControlAction(BOLD
));
260 addAction("em", new XHTMLTagControlAction(EMPHASIS
));
261 addAction("i", new XHTMLTagControlAction(ITALIC
));
262 addAction("code", new XHTMLTagControlAction(CODE
));
263 addAction("tt", new XHTMLTagControlAction(CODE
));
264 addAction("kbd", new XHTMLTagControlAction(CODE
));
265 addAction("var", new XHTMLTagControlAction(CODE
));
266 addAction("samp", new XHTMLTagControlAction(CODE
));
267 addAction("cite", new XHTMLTagControlAction(CITE
));
268 addAction("sub", new XHTMLTagControlAction(SUB
));
269 addAction("sup", new XHTMLTagControlAction(SUP
));
270 addAction("dd", new XHTMLTagControlAction(DEFINITION_DESCRIPTION
));
271 addAction("dfn", new XHTMLTagControlAction(DEFINITION
));
272 addAction("strike", new XHTMLTagControlAction(STRIKETHROUGH
));
274 addAction("a", new XHTMLTagHyperlinkAction());
276 addAction("img", new XHTMLTagImageAction("src"));
277 addAction("object", new XHTMLTagImageAction("data"));
279 //addAction("area", new XHTMLTagAction());
280 //addAction("map", new XHTMLTagAction());
282 //addAction("base", new XHTMLTagAction());
283 //addAction("blockquote", new XHTMLTagAction());
284 addAction("br", new XHTMLTagRestartParagraphAction());
285 //addAction("center", new XHTMLTagAction());
286 addAction("div", new XHTMLTagParagraphAction());
287 //addAction("dt", new XHTMLTagAction());
288 //addAction("head", new XHTMLTagAction());
289 //addAction("hr", new XHTMLTagAction());
290 //addAction("link", new XHTMLTagAction());
291 //addAction("param", new XHTMLTagAction());
292 //addAction("q", new XHTMLTagAction());
293 //addAction("s", new XHTMLTagAction());
295 addAction("pre", new XHTMLTagPreAction());
296 //addAction("big", new XHTMLTagAction());
297 //addAction("small", new XHTMLTagAction());
298 //addAction("u", new XHTMLTagAction());
300 //addAction("table", new XHTMLTagAction());
301 addAction("td", new XHTMLTagParagraphAction());
302 addAction("th", new XHTMLTagParagraphAction());
303 //addAction("tr", new XHTMLTagAction());
304 //addAction("caption", new XHTMLTagAction());
305 //addAction("span", new XHTMLTagAction());
309 XHTMLReader::XHTMLReader(BookReader
&modelReader
) : myModelReader(modelReader
) {
312 bool XHTMLReader::readFile(const std::string
&pathPrefix
, const std::string
&fileName
, const std::string
&referenceName
) {
313 myModelReader
.addHyperlinkLabel(referenceName
);
317 myPathPrefix
= pathPrefix
;
318 myReferenceName
= referenceName
;
320 myPreformatted
= false;
322 return readDocument(pathPrefix
+ fileName
);
326 void XHTMLReader::startElementHandler(const char *tag
, const char **attributes
) {
327 static const std::string HASH
= "#";
328 const char *id
= attributeValue(attributes
, "id");
330 myModelReader
.addHyperlinkLabel(myReferenceName
+ HASH
+ id
);
333 XHTMLTagAction
*action
= ourTagActions
[ZLUnicodeUtil::toLower(tag
)];
335 action
->doAtStart(*this, attributes
);
339 void XHTMLReader::endElementHandler(const char *tag
) {
340 XHTMLTagAction
*action
= ourTagActions
[ZLUnicodeUtil::toLower(tag
)];
342 action
->doAtEnd(*this);
346 void XHTMLReader::characterDataHandler(const char *text
, int len
) {
347 if (myPreformatted
) {
348 if ((*text
== '\r') || (*text
== '\n')) {
349 myModelReader
.addControl(CODE
, false);
350 myModelReader
.endParagraph();
351 myModelReader
.beginParagraph();
352 myModelReader
.addControl(CODE
, true);
354 int spaceCounter
= 0;
355 while ((spaceCounter
< len
) && isspace((unsigned char)*text
)) {
358 myModelReader
.addFixedHSpace(spaceCounter
);
359 text
+= spaceCounter
;
363 myModelReader
.addData(std::string(text
, len
));
367 const std::vector
<std::string
> &XHTMLReader::externalDTDs() const {
368 return EntityFilesCollector::instance().externalDTDs("xhtml");