FBReader 0.8.15
[lbook_fbreader.git] / fbreader / src / formats / xhtml / XHTMLReader.cpp
blob3ad85af8c8f1d0bc75bb88fe44c7ff7344bd8d56
1 /*
2 * Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
20 #include <string.h>
22 #include <ZLFileImage.h>
23 #include <ZLUnicodeUtil.h>
25 #include "XHTMLReader.h"
26 #include "../util/EntityFilesCollector.h"
27 #include "../util/MiscUtil.h"
29 #include "../../bookmodel/BookReader.h"
30 #include "../../bookmodel/BookModel.h"
32 std::map<std::string,XHTMLTagAction*> XHTMLReader::ourTagActions;
34 XHTMLTagAction::~XHTMLTagAction() {
37 class XHTMLTagParagraphAction : public XHTMLTagAction {
39 public:
40 void doAtStart(XHTMLReader &reader, const char **xmlattributes);
41 void doAtEnd(XHTMLReader &reader);
44 class XHTMLTagRestartParagraphAction : public XHTMLTagAction {
46 public:
47 void doAtStart(XHTMLReader &reader, const char **xmlattributes);
48 void doAtEnd(XHTMLReader &reader);
51 class XHTMLTagImageAction : public XHTMLTagAction {
53 public:
54 XHTMLTagImageAction(const std::string &nameAttribute);
56 void doAtStart(XHTMLReader &reader, const char **xmlattributes);
57 void doAtEnd(XHTMLReader &reader);
59 private:
60 const std::string myNameAttribute;
63 class XHTMLTagItemAction : public XHTMLTagAction {
65 public:
66 void doAtStart(XHTMLReader &reader, const char **xmlattributes);
67 void doAtEnd(XHTMLReader &reader);
70 class XHTMLTagHyperlinkAction : public XHTMLTagAction {
72 public:
73 void doAtStart(XHTMLReader &reader, const char **xmlattributes);
74 void doAtEnd(XHTMLReader &reader);
76 private:
77 std::stack<FBTextKind> myHyperlinkStack;
80 class XHTMLTagControlAction : public XHTMLTagAction {
82 public:
83 XHTMLTagControlAction(FBTextKind control);
85 void doAtStart(XHTMLReader &reader, const char **xmlattributes);
86 void doAtEnd(XHTMLReader &reader);
88 private:
89 FBTextKind myControl;
92 class XHTMLTagParagraphWithControlAction : public XHTMLTagAction {
94 public:
95 XHTMLTagParagraphWithControlAction(FBTextKind control);
97 void doAtStart(XHTMLReader &reader, const char **xmlattributes);
98 void doAtEnd(XHTMLReader &reader);
100 private:
101 FBTextKind myControl;
104 class XHTMLTagPreAction : public XHTMLTagAction {
106 public:
107 void doAtStart(XHTMLReader &reader, const char **xmlattributes);
108 void doAtEnd(XHTMLReader &reader);
111 void XHTMLTagParagraphAction::doAtStart(XHTMLReader &reader, const char**) {
112 reader.myModelReader.beginParagraph();
115 void XHTMLTagParagraphAction::doAtEnd(XHTMLReader &reader) {
116 reader.myModelReader.endParagraph();
119 void XHTMLTagRestartParagraphAction::doAtStart(XHTMLReader &reader, const char**) {
120 reader.myModelReader.endParagraph();
121 reader.myModelReader.beginParagraph();
124 void XHTMLTagRestartParagraphAction::doAtEnd(XHTMLReader&) {
127 void XHTMLTagItemAction::doAtStart(XHTMLReader &reader, const char**) {
128 reader.myModelReader.endParagraph();
129 // TODO: increase left indent
130 reader.myModelReader.beginParagraph();
131 // TODO: replace bullet sign by number inside OL tag
132 const std::string bullet = "\xE2\x80\xA2\xC0\xA0";
133 reader.myModelReader.addData(bullet);
136 void XHTMLTagItemAction::doAtEnd(XHTMLReader &reader) {
137 reader.myModelReader.endParagraph();
140 XHTMLTagImageAction::XHTMLTagImageAction(const std::string &nameAttribute) : myNameAttribute(nameAttribute) {
143 void XHTMLTagImageAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
144 const char *fileName = reader.attributeValue(xmlattributes, myNameAttribute.c_str());
145 if (fileName != 0) {
146 bool flag = reader.myModelReader.paragraphIsOpen();
147 if (flag) {
148 reader.myModelReader.endParagraph();
150 if ((strlen(fileName) > 2) && strncmp(fileName, "./", 2) == 0) {
151 fileName +=2;
153 const std::string fullfileName = reader.myPathPrefix + fileName;
154 reader.myModelReader.addImageReference(fullfileName);
155 reader.myModelReader.addImage(fullfileName, new ZLFileImage("image/auto", fullfileName, 0));
156 if (flag) {
157 reader.myModelReader.beginParagraph();
162 void XHTMLTagImageAction::doAtEnd(XHTMLReader&) {
165 XHTMLTagControlAction::XHTMLTagControlAction(FBTextKind control) : myControl(control) {
168 void XHTMLTagControlAction::doAtStart(XHTMLReader &reader, const char**) {
169 reader.myModelReader.pushKind(myControl);
170 reader.myModelReader.addControl(myControl, true);
173 void XHTMLTagControlAction::doAtEnd(XHTMLReader &reader) {
174 reader.myModelReader.addControl(myControl, false);
175 reader.myModelReader.popKind();
178 void XHTMLTagHyperlinkAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
179 const char *href = reader.attributeValue(xmlattributes, "href");
180 if (href != 0) {
181 const std::string link = (*href == '#') ? (reader.myReferenceName + href) : href;
182 FBTextKind hyperlinkType = MiscUtil::isReference(link) ? EXTERNAL_HYPERLINK : INTERNAL_HYPERLINK;
183 myHyperlinkStack.push(hyperlinkType);
184 reader.myModelReader.addHyperlinkControl(hyperlinkType, link);
185 } else {
186 myHyperlinkStack.push(REGULAR);
188 const char *name = reader.attributeValue(xmlattributes, "name");
189 if (name != 0) {
190 reader.myModelReader.addHyperlinkLabel(reader.myReferenceName + "#" + name);
194 void XHTMLTagHyperlinkAction::doAtEnd(XHTMLReader &reader) {
195 FBTextKind kind = myHyperlinkStack.top();
196 if (kind != REGULAR) {
197 reader.myModelReader.addControl(kind, false);
199 myHyperlinkStack.pop();
202 XHTMLTagParagraphWithControlAction::XHTMLTagParagraphWithControlAction(FBTextKind control) : myControl(control) {
205 void XHTMLTagParagraphWithControlAction::doAtStart(XHTMLReader &reader, const char**) {
206 if ((myControl == TITLE) && (reader.myModelReader.model().bookTextModel()->paragraphsNumber() > 1)) {
207 reader.myModelReader.insertEndOfSectionParagraph();
209 reader.myModelReader.pushKind(myControl);
210 reader.myModelReader.beginParagraph();
213 void XHTMLTagParagraphWithControlAction::doAtEnd(XHTMLReader &reader) {
214 reader.myModelReader.endParagraph();
215 reader.myModelReader.popKind();
218 void XHTMLTagPreAction::doAtStart(XHTMLReader &reader, const char**) {
219 reader.myPreformatted = true;
220 reader.myModelReader.beginParagraph();
221 reader.myModelReader.addControl(CODE, true);
224 void XHTMLTagPreAction::doAtEnd(XHTMLReader &reader) {
225 reader.myModelReader.addControl(CODE, false);
226 reader.myModelReader.endParagraph();
227 reader.myPreformatted = false;
230 void XHTMLReader::addAction(const std::string &tag, XHTMLTagAction *action) {
231 ourTagActions.insert(std::pair<std::string,XHTMLTagAction*>(tag,action));
234 void XHTMLReader::fillTagTable() {
235 if (ourTagActions.empty()) {
236 //addAction("html", new XHTMLTagAction());
237 addAction("body", new XHTMLTagParagraphAction());
238 //addAction("title", new XHTMLTagAction());
239 //addAction("meta", new XHTMLTagAction());
240 //addAction("script", new XHTMLTagAction());
242 //addAction("font", new XHTMLTagAction());
243 //addAction("style", new XHTMLTagAction());
245 addAction("p", new XHTMLTagParagraphAction());
246 addAction("h1", new XHTMLTagParagraphWithControlAction(H1));
247 addAction("h2", new XHTMLTagParagraphWithControlAction(H2));
248 addAction("h3", new XHTMLTagParagraphWithControlAction(H3));
249 addAction("h4", new XHTMLTagParagraphWithControlAction(H4));
250 addAction("h5", new XHTMLTagParagraphWithControlAction(H5));
251 addAction("h6", new XHTMLTagParagraphWithControlAction(H6));
253 //addAction("ol", new XHTMLTagAction());
254 //addAction("ul", new XHTMLTagAction());
255 //addAction("dl", new XHTMLTagAction());
256 addAction("li", new XHTMLTagItemAction());
258 addAction("strong", new XHTMLTagControlAction(STRONG));
259 addAction("b", new XHTMLTagControlAction(BOLD));
260 addAction("em", new XHTMLTagControlAction(EMPHASIS));
261 addAction("i", new XHTMLTagControlAction(ITALIC));
262 addAction("code", new XHTMLTagControlAction(CODE));
263 addAction("tt", new XHTMLTagControlAction(CODE));
264 addAction("kbd", new XHTMLTagControlAction(CODE));
265 addAction("var", new XHTMLTagControlAction(CODE));
266 addAction("samp", new XHTMLTagControlAction(CODE));
267 addAction("cite", new XHTMLTagControlAction(CITE));
268 addAction("sub", new XHTMLTagControlAction(SUB));
269 addAction("sup", new XHTMLTagControlAction(SUP));
270 addAction("dd", new XHTMLTagControlAction(DEFINITION_DESCRIPTION));
271 addAction("dfn", new XHTMLTagControlAction(DEFINITION));
272 addAction("strike", new XHTMLTagControlAction(STRIKETHROUGH));
274 addAction("a", new XHTMLTagHyperlinkAction());
276 addAction("img", new XHTMLTagImageAction("src"));
277 addAction("object", new XHTMLTagImageAction("data"));
279 //addAction("area", new XHTMLTagAction());
280 //addAction("map", new XHTMLTagAction());
282 //addAction("base", new XHTMLTagAction());
283 //addAction("blockquote", new XHTMLTagAction());
284 addAction("br", new XHTMLTagRestartParagraphAction());
285 //addAction("center", new XHTMLTagAction());
286 addAction("div", new XHTMLTagParagraphAction());
287 //addAction("dt", new XHTMLTagAction());
288 //addAction("head", new XHTMLTagAction());
289 //addAction("hr", new XHTMLTagAction());
290 //addAction("link", new XHTMLTagAction());
291 //addAction("param", new XHTMLTagAction());
292 //addAction("q", new XHTMLTagAction());
293 //addAction("s", new XHTMLTagAction());
295 addAction("pre", new XHTMLTagPreAction());
296 //addAction("big", new XHTMLTagAction());
297 //addAction("small", new XHTMLTagAction());
298 //addAction("u", new XHTMLTagAction());
300 //addAction("table", new XHTMLTagAction());
301 addAction("td", new XHTMLTagParagraphAction());
302 addAction("th", new XHTMLTagParagraphAction());
303 //addAction("tr", new XHTMLTagAction());
304 //addAction("caption", new XHTMLTagAction());
305 //addAction("span", new XHTMLTagAction());
309 XHTMLReader::XHTMLReader(BookReader &modelReader) : myModelReader(modelReader) {
312 bool XHTMLReader::readFile(const std::string &pathPrefix, const std::string &fileName, const std::string &referenceName) {
313 myModelReader.addHyperlinkLabel(referenceName);
315 fillTagTable();
317 myPathPrefix = pathPrefix;
318 myReferenceName = referenceName;
320 myPreformatted = false;
322 return readDocument(pathPrefix + fileName);
326 void XHTMLReader::startElementHandler(const char *tag, const char **attributes) {
327 static const std::string HASH = "#";
328 const char *id = attributeValue(attributes, "id");
329 if (id != 0) {
330 myModelReader.addHyperlinkLabel(myReferenceName + HASH + id);
333 XHTMLTagAction *action = ourTagActions[ZLUnicodeUtil::toLower(tag)];
334 if (action != 0) {
335 action->doAtStart(*this, attributes);
339 void XHTMLReader::endElementHandler(const char *tag) {
340 XHTMLTagAction *action = ourTagActions[ZLUnicodeUtil::toLower(tag)];
341 if (action != 0) {
342 action->doAtEnd(*this);
346 void XHTMLReader::characterDataHandler(const char *text, int len) {
347 if (myPreformatted) {
348 if ((*text == '\r') || (*text == '\n')) {
349 myModelReader.addControl(CODE, false);
350 myModelReader.endParagraph();
351 myModelReader.beginParagraph();
352 myModelReader.addControl(CODE, true);
354 int spaceCounter = 0;
355 while ((spaceCounter < len) && isspace((unsigned char)*text)) {
356 ++spaceCounter;
358 myModelReader.addFixedHSpace(spaceCounter);
359 text += spaceCounter;
360 len -= spaceCounter;
362 if (len > 0) {
363 myModelReader.addData(std::string(text, len));
367 const std::vector<std::string> &XHTMLReader::externalDTDs() const {
368 return EntityFilesCollector::instance().externalDTDs("xhtml");