gui
[lbook_fbreader.git] / fbreader / src / formats / pdb / MobipocketHtmlBookReader.cpp
blob91dcd2d80fc76208d69ca250776bcd80d81920f9
1 /*
2 * Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
20 #include <stdlib.h>
21 #include <string.h>
23 #include <algorithm>
25 #include <ZLFile.h>
26 #include <ZLFileImage.h>
27 #include <ZLStringUtil.h>
28 #include <ZLUnicodeUtil.h>
30 #include "MobipocketHtmlBookReader.h"
31 #include "MobipocketStream.h"
32 #include "../html/HtmlTagActions.h"
33 #include "../../bookmodel/BookModel.h"
35 class MobipocketHtmlImageTagAction : public HtmlTagAction {
37 public:
38 MobipocketHtmlImageTagAction(HtmlBookReader &reader);
39 void run(const HtmlReader::HtmlTag &tag);
42 class MobipocketHtmlHrTagAction : public HtmlTagAction {
44 public:
45 MobipocketHtmlHrTagAction(HtmlBookReader &reader);
46 void run(const HtmlReader::HtmlTag &tag);
49 class MobipocketHtmlHrefTagAction : public HtmlHrefTagAction {
51 public:
52 MobipocketHtmlHrefTagAction(HtmlBookReader &reader);
53 void run(const HtmlReader::HtmlTag &tag);
56 class MobipocketHtmlGuideTagAction : public HtmlTagAction {
58 public:
59 MobipocketHtmlGuideTagAction(HtmlBookReader &reader);
60 void run(const HtmlReader::HtmlTag &tag);
63 class MobipocketHtmlReferenceTagAction : public HtmlTagAction {
65 public:
66 MobipocketHtmlReferenceTagAction(HtmlBookReader &reader);
67 void run(const HtmlReader::HtmlTag &tag);
70 MobipocketHtmlImageTagAction::MobipocketHtmlImageTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
73 void MobipocketHtmlImageTagAction::run(const HtmlReader::HtmlTag &tag) {
74 if (tag.Start) {
75 for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
76 if (tag.Attributes[i].Name == "RECINDEX") {
77 int index = atoi(tag.Attributes[i].Value.c_str());
78 if (index > 0) {
79 int &imageCounter = ((MobipocketHtmlBookReader&)myReader).myImageCounter;
80 imageCounter = std::max(imageCounter, index);
81 bool stopParagraph = bookReader().paragraphIsOpen();
82 if (stopParagraph) {
83 bookReader().endParagraph();
85 std::string id;
86 ZLStringUtil::appendNumber(id, index);
87 bookReader().addImageReference(id);
88 if (stopParagraph) {
89 bookReader().beginParagraph();
92 break;
98 MobipocketHtmlHrTagAction::MobipocketHtmlHrTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
101 void MobipocketHtmlHrTagAction::run(const HtmlReader::HtmlTag &tag) {
102 if (tag.Start) {
103 if (bookReader().contentsParagraphIsOpen()) {
104 bookReader().endContentsParagraph();
105 bookReader().exitTitle();
107 bookReader().insertEndOfSectionParagraph();
111 MobipocketHtmlHrefTagAction::MobipocketHtmlHrefTagAction(HtmlBookReader &reader) : HtmlHrefTagAction(reader) {
114 MobipocketHtmlBookReader::TOCReader::TOCReader(MobipocketHtmlBookReader &reader) : myReader(reader) {
115 reset();
118 void MobipocketHtmlBookReader::TOCReader::reset() {
119 myEntries.clear();
121 myIsActive = false;
122 myStartOffset = (size_t)-1;
123 myEndOffset = (size_t)-1;
124 myCurrentEntryText.erase();
127 bool MobipocketHtmlBookReader::TOCReader::rangeContainsPosition(size_t position) {
128 return (myStartOffset <= position) && (myEndOffset > position);
131 void MobipocketHtmlBookReader::TOCReader::startReadEntry(size_t position) {
132 myCurrentReference = position;
133 myIsActive = true;
136 void MobipocketHtmlBookReader::TOCReader::endReadEntry() {
137 if (myIsActive && !myCurrentEntryText.empty()) {
138 std::string converted;
139 myReader.myConverter->convert(converted, myCurrentEntryText);
140 myReader.myConverter->reset();
141 myEntries[myCurrentReference] = converted;
142 myCurrentEntryText.erase();
144 myIsActive = false;
147 void MobipocketHtmlBookReader::TOCReader::appendText(const char *text, size_t len) {
148 if (myIsActive) {
149 myCurrentEntryText.append(text, len);
153 void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const std::string &text) {
154 myEntries[position] = text;
155 if (rangeContainsPosition(position)) {
156 setEndOffset(position);
160 void MobipocketHtmlBookReader::TOCReader::setStartOffset(size_t position) {
161 myStartOffset = position;
162 std::map<size_t,std::string>::const_iterator it = myEntries.lower_bound(position);
163 if (it != myEntries.end()) {
164 ++it;
165 if (it != myEntries.end()) {
166 myEndOffset = it->first;
171 void MobipocketHtmlBookReader::TOCReader::setEndOffset(size_t position) {
172 myEndOffset = position;
175 const std::map<size_t,std::string> &MobipocketHtmlBookReader::TOCReader::entries() const {
176 return myEntries;
179 void MobipocketHtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) {
180 MobipocketHtmlBookReader &reader = (MobipocketHtmlBookReader&)myReader;
181 if (tag.Start) {
182 for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
183 if (tag.Attributes[i].Name == "FILEPOS") {
184 const std::string &value = tag.Attributes[i].Value;
185 if (!value.empty()) {
186 std::string label = "&";
187 int intValue = atoi(value.c_str());
188 if (intValue > 0) {
189 if (reader.myTocReader.rangeContainsPosition(tag.Offset)) {
190 reader.myTocReader.startReadEntry(intValue);
191 if (reader.myTocReader.rangeContainsPosition(intValue)) {
192 reader.myTocReader.setEndOffset(intValue);
195 reader.myFileposReferences.insert(intValue);
196 ZLStringUtil::appendNumber(label, intValue);
197 setHyperlinkType(INTERNAL_HYPERLINK);
198 bookReader().addHyperlinkControl(INTERNAL_HYPERLINK, label);
199 return;
204 } else {
205 reader.myTocReader.endReadEntry();
207 HtmlHrefTagAction::run(tag);
210 MobipocketHtmlGuideTagAction::MobipocketHtmlGuideTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
213 void MobipocketHtmlGuideTagAction::run(const HtmlReader::HtmlTag &tag) {
214 MobipocketHtmlBookReader &reader = (MobipocketHtmlBookReader&)myReader;
215 reader.myInsideGuide = tag.Start;
218 MobipocketHtmlReferenceTagAction::MobipocketHtmlReferenceTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
221 void MobipocketHtmlReferenceTagAction::run(const HtmlReader::HtmlTag &tag) {
222 MobipocketHtmlBookReader &reader = (MobipocketHtmlBookReader&)myReader;
223 if (reader.myInsideGuide) {
224 std::string title;
225 std::string filepos;
226 bool isTocReference = false;
227 for (size_t i = 0; i < tag.Attributes.size(); ++i) {
228 const std::string &name = tag.Attributes[i].Name;
229 const std::string &value = tag.Attributes[i].Value;
230 if (name == "TITLE") {
231 title = value;
232 } else if (name == "FILEPOS") {
233 filepos = value;
234 } else if ((name == "TYPE") && (ZLUnicodeUtil::toUpper(value) == "TOC")) {
235 isTocReference = true;
238 if (!title.empty() && !filepos.empty()) {
239 int position = atoi(filepos.c_str());
240 if (position > 0) {
241 reader.myTocReader.addReference(position, title);
242 if (isTocReference) {
243 reader.myTocReader.setStartOffset(position);
250 shared_ptr<HtmlTagAction> MobipocketHtmlBookReader::createAction(const std::string &tag) {
251 if (tag == "IMG") {
252 return new MobipocketHtmlImageTagAction(*this);
253 } else if (tag == "HR") {
254 return new MobipocketHtmlHrTagAction(*this);
255 } else if (tag == "A") {
256 return new MobipocketHtmlHrefTagAction(*this);
257 } else if (tag == "GUIDE") {
258 return new MobipocketHtmlGuideTagAction(*this);
259 } else if (tag == "REFERENCE") {
260 return new MobipocketHtmlReferenceTagAction(*this);
262 return HtmlBookReader::createAction(tag);
265 void MobipocketHtmlBookReader::startDocumentHandler() {
266 HtmlBookReader::startDocumentHandler();
267 myImageCounter = 0;
268 myInsideGuide = false;
269 myFileposReferences.clear();
270 myPositionToParagraphMap.clear();
271 myTocReader.reset();
274 bool MobipocketHtmlBookReader::tagHandler(const HtmlTag &tag) {
275 size_t paragraphNumber = myBookReader.model().bookTextModel()->paragraphsNumber();
276 if (myBookReader.paragraphIsOpen()) {
277 --paragraphNumber;
279 myPositionToParagraphMap.push_back(std::pair<size_t,size_t>(tag.Offset, paragraphNumber));
280 return HtmlBookReader::tagHandler(tag);
283 MobipocketHtmlBookReader::MobipocketHtmlBookReader(const std::string &fileName, BookModel &model, const PlainTextFormat &format, const std::string &encoding) : HtmlBookReader("", model, format, encoding), myFileName(fileName), myTocReader(*this) {
284 setBuildTableOfContent(false);
285 setProcessPreTag(false);
288 bool MobipocketHtmlBookReader::characterDataHandler(const char *text, int len, bool convert) {
289 myTocReader.appendText(text, len);
290 return HtmlBookReader::characterDataHandler(text, len, convert);
293 void MobipocketHtmlBookReader::readDocument(ZLInputStream &stream) {
294 HtmlBookReader::readDocument(stream);
296 shared_ptr<ZLInputStream> fileStream = ZLFile(myFileName).inputStream();
297 bool found = false;
298 int index = 0;
299 if (!fileStream.isNull() && fileStream->open()) {
300 char bu[10];
301 std::pair<int,int> firstImageLocation = ((MobipocketStream&)stream).imageLocation(0);
302 fileStream->seek(firstImageLocation.first, false);
303 while ((firstImageLocation.first > 0) && (firstImageLocation.second > 0)) {
304 if (firstImageLocation.second > 10) {
305 fileStream->read(bu, 10);
306 if ((strncmp(bu, "BM", 2) == 0) ||
307 (strncmp(bu, "GIF8", 4) == 0) ||
308 (strncmp(bu + 6, "JFIF", 4) == 0)) {
309 found = true;
310 break;
312 fileStream->seek(firstImageLocation.second - 10, false);
313 } else {
314 fileStream->seek(firstImageLocation.second, false);
316 index++;
317 firstImageLocation = ((MobipocketStream&)stream).imageLocation(index);
319 fileStream->close();
322 if (found) {
323 for (int i = 0; i < myImageCounter; i++) {
324 std::pair<int,int> imageLocation = ((MobipocketStream&)stream).imageLocation(i + index);
325 if ((imageLocation.first > 0) && (imageLocation.second > 0)) {
326 std::string id;
327 ZLStringUtil::appendNumber(id, i + 1);
328 myBookReader.addImage(id, new ZLFileImage("image/auto", myFileName, imageLocation.first, imageLocation.second));
333 std::vector<std::pair<size_t,size_t> >::const_iterator jt = myPositionToParagraphMap.begin();
334 for (std::set<size_t>::const_iterator it = myFileposReferences.begin(); it != myFileposReferences.end(); ++it) {
335 while (jt != myPositionToParagraphMap.end() && jt->first < *it) {
336 ++jt;
338 if (jt == myPositionToParagraphMap.end()) {
339 break;
341 std::string label = "&";
342 ZLStringUtil::appendNumber(label, *it);
343 myBookReader.addHyperlinkLabel(label, jt->second);
346 jt = myPositionToParagraphMap.begin();
347 const std::map<size_t,std::string> &entries = myTocReader.entries();
348 for (std::map<size_t,std::string>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
349 while (jt != myPositionToParagraphMap.end() && jt->first < it->first) {
350 ++jt;
352 if (jt == myPositionToParagraphMap.end()) {
353 break;
355 myBookReader.beginContentsParagraph(jt->second);
356 myBookReader.addContentsData(it->second);
357 myBookReader.endContentsParagraph();