Fix tg_termpos1 for 64-bit termpos
[xapian.git] / xapian-applications / omega / handler_libcdr.cc
blobd507380deecfb35cff4f3146566dcc719c41d687
1 /** @file
2 * @brief Extract text using libcdr.
3 */
4 /* Copyright (C) 2020 Parth Kapadia
5 * Copyright (C) 2022,2023 Olly Betts
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
22 #include <config.h>
23 #include "handler.h"
25 #include <librevenge-generators/librevenge-generators.h>
26 #include <librevenge-stream/librevenge-stream.h>
27 #include <libcdr/libcdr.h>
29 using namespace librevenge;
30 using namespace std;
32 bool
33 initialise()
35 return true;
38 void
39 extract(const string& filename, const string&)
41 RVNGFileStream input(filename.c_str());
42 RVNGStringVector pages;
43 RVNGTextDrawingGenerator content(pages);
45 // There's also support in libcdr for CMX files, which is an exchange
46 // format used by CorelDraw which seems to be mostly used for brushes
47 // and clip art, neither of which are likely to contain extractable
48 // text, so currently we don't attempt to handle CMX files here.
50 // There don't seem to be many freely available sample files either - the
51 // only one I found easily was NEWSFLASH.CMX in the EDRM dataset, which
52 // doesn't contain any extractable text.
54 // check if cdr file supported
55 if (!libcdr::CDRDocument::isSupported(&input)) {
56 send_field(FIELD_ERROR, "Format not supported");
57 return;
60 if (!libcdr::CDRDocument::parse(&input, &content)) {
61 send_field(FIELD_ERROR, "Failed to parse file");
62 return;
65 int page_count = pages.size();
66 send_field_page_count(page_count);
67 for (auto i = 0; i < page_count; ++i) {
68 const RVNGString& page = pages[i];
69 send_field(FIELD_BODY, page.cstr(), page.size());