2 * Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 #include <ZLFileImage.h>
27 #include <ZLStringUtil.h>
28 #include <ZLUnicodeUtil.h>
30 #include "MobipocketHtmlBookReader.h"
31 #include "MobipocketStream.h"
32 #include "../html/HtmlTagActions.h"
33 #include "../../bookmodel/BookModel.h"
35 class MobipocketHtmlImageTagAction
: public HtmlTagAction
{
38 MobipocketHtmlImageTagAction(HtmlBookReader
&reader
);
39 void run(const HtmlReader::HtmlTag
&tag
);
42 class MobipocketHtmlHrTagAction
: public HtmlTagAction
{
45 MobipocketHtmlHrTagAction(HtmlBookReader
&reader
);
46 void run(const HtmlReader::HtmlTag
&tag
);
49 class MobipocketHtmlHrefTagAction
: public HtmlHrefTagAction
{
52 MobipocketHtmlHrefTagAction(HtmlBookReader
&reader
);
53 void run(const HtmlReader::HtmlTag
&tag
);
56 class MobipocketHtmlGuideTagAction
: public HtmlTagAction
{
59 MobipocketHtmlGuideTagAction(HtmlBookReader
&reader
);
60 void run(const HtmlReader::HtmlTag
&tag
);
63 class MobipocketHtmlReferenceTagAction
: public HtmlTagAction
{
66 MobipocketHtmlReferenceTagAction(HtmlBookReader
&reader
);
67 void run(const HtmlReader::HtmlTag
&tag
);
70 MobipocketHtmlImageTagAction::MobipocketHtmlImageTagAction(HtmlBookReader
&reader
) : HtmlTagAction(reader
) {
73 void MobipocketHtmlImageTagAction::run(const HtmlReader::HtmlTag
&tag
) {
75 for (unsigned int i
= 0; i
< tag
.Attributes
.size(); ++i
) {
76 if (tag
.Attributes
[i
].Name
== "RECINDEX") {
77 int index
= atoi(tag
.Attributes
[i
].Value
.c_str());
79 int &imageCounter
= ((MobipocketHtmlBookReader
&)myReader
).myImageCounter
;
80 imageCounter
= std::max(imageCounter
, index
);
81 bool stopParagraph
= bookReader().paragraphIsOpen();
83 bookReader().endParagraph();
86 ZLStringUtil::appendNumber(id
, index
);
87 bookReader().addImageReference(id
);
89 bookReader().beginParagraph();
98 MobipocketHtmlHrTagAction::MobipocketHtmlHrTagAction(HtmlBookReader
&reader
) : HtmlTagAction(reader
) {
101 void MobipocketHtmlHrTagAction::run(const HtmlReader::HtmlTag
&tag
) {
103 if (bookReader().contentsParagraphIsOpen()) {
104 bookReader().endContentsParagraph();
105 bookReader().exitTitle();
107 bookReader().insertEndOfSectionParagraph();
111 MobipocketHtmlHrefTagAction::MobipocketHtmlHrefTagAction(HtmlBookReader
&reader
) : HtmlHrefTagAction(reader
) {
114 MobipocketHtmlBookReader::TOCReader::TOCReader(MobipocketHtmlBookReader
&reader
) : myReader(reader
) {
118 void MobipocketHtmlBookReader::TOCReader::reset() {
122 myStartOffset
= (size_t)-1;
123 myEndOffset
= (size_t)-1;
124 myCurrentEntryText
.erase();
127 bool MobipocketHtmlBookReader::TOCReader::rangeContainsPosition(size_t position
) {
128 return (myStartOffset
<= position
) && (myEndOffset
> position
);
131 void MobipocketHtmlBookReader::TOCReader::startReadEntry(size_t position
) {
132 myCurrentReference
= position
;
136 void MobipocketHtmlBookReader::TOCReader::endReadEntry() {
137 if (myIsActive
&& !myCurrentEntryText
.empty()) {
138 std::string converted
;
139 myReader
.myConverter
->convert(converted
, myCurrentEntryText
);
140 myReader
.myConverter
->reset();
141 myEntries
[myCurrentReference
] = converted
;
142 myCurrentEntryText
.erase();
147 void MobipocketHtmlBookReader::TOCReader::appendText(const char *text
, size_t len
) {
149 myCurrentEntryText
.append(text
, len
);
153 void MobipocketHtmlBookReader::TOCReader::addReference(size_t position
, const std::string
&text
) {
154 myEntries
[position
] = text
;
155 if (rangeContainsPosition(position
)) {
156 setEndOffset(position
);
160 void MobipocketHtmlBookReader::TOCReader::setStartOffset(size_t position
) {
161 myStartOffset
= position
;
162 std::map
<size_t,std::string
>::const_iterator it
= myEntries
.lower_bound(position
);
163 if (it
!= myEntries
.end()) {
165 if (it
!= myEntries
.end()) {
166 myEndOffset
= it
->first
;
171 void MobipocketHtmlBookReader::TOCReader::setEndOffset(size_t position
) {
172 myEndOffset
= position
;
175 const std::map
<size_t,std::string
> &MobipocketHtmlBookReader::TOCReader::entries() const {
179 void MobipocketHtmlHrefTagAction::run(const HtmlReader::HtmlTag
&tag
) {
180 MobipocketHtmlBookReader
&reader
= (MobipocketHtmlBookReader
&)myReader
;
182 for (unsigned int i
= 0; i
< tag
.Attributes
.size(); ++i
) {
183 if (tag
.Attributes
[i
].Name
== "FILEPOS") {
184 const std::string
&value
= tag
.Attributes
[i
].Value
;
185 if (!value
.empty()) {
186 std::string label
= "&";
187 int intValue
= atoi(value
.c_str());
189 if (reader
.myTocReader
.rangeContainsPosition(tag
.Offset
)) {
190 reader
.myTocReader
.startReadEntry(intValue
);
191 if (reader
.myTocReader
.rangeContainsPosition(intValue
)) {
192 reader
.myTocReader
.setEndOffset(intValue
);
195 reader
.myFileposReferences
.insert(intValue
);
196 ZLStringUtil::appendNumber(label
, intValue
);
197 setHyperlinkType(INTERNAL_HYPERLINK
);
198 bookReader().addHyperlinkControl(INTERNAL_HYPERLINK
, label
);
205 reader
.myTocReader
.endReadEntry();
207 HtmlHrefTagAction::run(tag
);
210 MobipocketHtmlGuideTagAction::MobipocketHtmlGuideTagAction(HtmlBookReader
&reader
) : HtmlTagAction(reader
) {
213 void MobipocketHtmlGuideTagAction::run(const HtmlReader::HtmlTag
&tag
) {
214 MobipocketHtmlBookReader
&reader
= (MobipocketHtmlBookReader
&)myReader
;
215 reader
.myInsideGuide
= tag
.Start
;
218 MobipocketHtmlReferenceTagAction::MobipocketHtmlReferenceTagAction(HtmlBookReader
&reader
) : HtmlTagAction(reader
) {
221 void MobipocketHtmlReferenceTagAction::run(const HtmlReader::HtmlTag
&tag
) {
222 MobipocketHtmlBookReader
&reader
= (MobipocketHtmlBookReader
&)myReader
;
223 if (reader
.myInsideGuide
) {
226 bool isTocReference
= false;
227 for (size_t i
= 0; i
< tag
.Attributes
.size(); ++i
) {
228 const std::string
&name
= tag
.Attributes
[i
].Name
;
229 const std::string
&value
= tag
.Attributes
[i
].Value
;
230 if (name
== "TITLE") {
232 } else if (name
== "FILEPOS") {
234 } else if ((name
== "TYPE") && (ZLUnicodeUtil::toUpper(value
) == "TOC")) {
235 isTocReference
= true;
238 if (!title
.empty() && !filepos
.empty()) {
239 int position
= atoi(filepos
.c_str());
241 reader
.myTocReader
.addReference(position
, title
);
242 if (isTocReference
) {
243 reader
.myTocReader
.setStartOffset(position
);
250 shared_ptr
<HtmlTagAction
> MobipocketHtmlBookReader::createAction(const std::string
&tag
) {
252 return new MobipocketHtmlImageTagAction(*this);
253 } else if (tag
== "HR") {
254 return new MobipocketHtmlHrTagAction(*this);
255 } else if (tag
== "A") {
256 return new MobipocketHtmlHrefTagAction(*this);
257 } else if (tag
== "GUIDE") {
258 return new MobipocketHtmlGuideTagAction(*this);
259 } else if (tag
== "REFERENCE") {
260 return new MobipocketHtmlReferenceTagAction(*this);
262 return HtmlBookReader::createAction(tag
);
265 void MobipocketHtmlBookReader::startDocumentHandler() {
266 HtmlBookReader::startDocumentHandler();
268 myInsideGuide
= false;
269 myFileposReferences
.clear();
270 myPositionToParagraphMap
.clear();
274 bool MobipocketHtmlBookReader::tagHandler(const HtmlTag
&tag
) {
275 size_t paragraphNumber
= myBookReader
.model().bookTextModel()->paragraphsNumber();
276 if (myBookReader
.paragraphIsOpen()) {
279 myPositionToParagraphMap
.push_back(std::pair
<size_t,size_t>(tag
.Offset
, paragraphNumber
));
280 return HtmlBookReader::tagHandler(tag
);
283 MobipocketHtmlBookReader::MobipocketHtmlBookReader(const std::string
&fileName
, BookModel
&model
, const PlainTextFormat
&format
, const std::string
&encoding
) : HtmlBookReader("", model
, format
, encoding
), myFileName(fileName
), myTocReader(*this) {
284 setBuildTableOfContent(false);
285 setProcessPreTag(false);
288 bool MobipocketHtmlBookReader::characterDataHandler(const char *text
, int len
, bool convert
) {
289 myTocReader
.appendText(text
, len
);
290 return HtmlBookReader::characterDataHandler(text
, len
, convert
);
293 void MobipocketHtmlBookReader::readDocument(ZLInputStream
&stream
) {
294 HtmlBookReader::readDocument(stream
);
296 shared_ptr
<ZLInputStream
> fileStream
= ZLFile(myFileName
).inputStream();
299 if (!fileStream
.isNull() && fileStream
->open()) {
301 std::pair
<int,int> firstImageLocation
= ((MobipocketStream
&)stream
).imageLocation(0);
302 fileStream
->seek(firstImageLocation
.first
, false);
303 while ((firstImageLocation
.first
> 0) && (firstImageLocation
.second
> 0)) {
304 if (firstImageLocation
.second
> 10) {
305 fileStream
->read(bu
, 10);
306 if ((strncmp(bu
, "BM", 2) == 0) ||
307 (strncmp(bu
, "GIF8", 4) == 0) ||
308 (strncmp(bu
+ 6, "JFIF", 4) == 0)) {
312 fileStream
->seek(firstImageLocation
.second
- 10, false);
314 fileStream
->seek(firstImageLocation
.second
, false);
317 firstImageLocation
= ((MobipocketStream
&)stream
).imageLocation(index
);
323 for (int i
= 0; i
< myImageCounter
; i
++) {
324 std::pair
<int,int> imageLocation
= ((MobipocketStream
&)stream
).imageLocation(i
+ index
);
325 if ((imageLocation
.first
> 0) && (imageLocation
.second
> 0)) {
327 ZLStringUtil::appendNumber(id
, i
+ 1);
328 myBookReader
.addImage(id
, new ZLFileImage("image/auto", myFileName
, imageLocation
.first
, imageLocation
.second
));
333 std::vector
<std::pair
<size_t,size_t> >::const_iterator jt
= myPositionToParagraphMap
.begin();
334 for (std::set
<size_t>::const_iterator it
= myFileposReferences
.begin(); it
!= myFileposReferences
.end(); ++it
) {
335 while (jt
!= myPositionToParagraphMap
.end() && jt
->first
< *it
) {
338 if (jt
== myPositionToParagraphMap
.end()) {
341 std::string label
= "&";
342 ZLStringUtil::appendNumber(label
, *it
);
343 myBookReader
.addHyperlinkLabel(label
, jt
->second
);
346 jt
= myPositionToParagraphMap
.begin();
347 const std::map
<size_t,std::string
> &entries
= myTocReader
.entries();
348 for (std::map
<size_t,std::string
>::const_iterator it
= entries
.begin(); it
!= entries
.end(); ++it
) {
349 while (jt
!= myPositionToParagraphMap
.end() && jt
->first
< it
->first
) {
352 if (jt
== myPositionToParagraphMap
.end()) {
355 myBookReader
.beginContentsParagraph(jt
->second
);
356 myBookReader
.addContentsData(it
->second
);
357 myBookReader
.endContentsParagraph();