2 * @brief Parser for OpenDocument's meta.xml.
4 * Also used for MSXML's docProps/core.xml.
6 /* Copyright (C) 2006,2009,2010,2011,2013,2015,2020,2022 Olly Betts
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "opendocmetaparser.h"
33 OpenDocMetaParser::process_content(const string
& content
)
37 if (!keywords
.empty()) keywords
+= ' ';
41 if (!title
.empty()) title
+= ' ';
45 if (!sample
.empty()) sample
+= ' ';
49 if (!author
.empty()) author
+= ' ';
53 // E.g. 2013-03-04T22:57:00
54 created
= parse_datetime(content
);
58 // Ignore other fields.
64 OpenDocMetaParser::opening_tag(const string
& tag
)
66 if (tag
.size() < 8) return true;
67 if (tag
[0] == 'd' && tag
[1] == 'c') {
68 if (tag
== "dc:subject") {
69 // OpenDocument, MSXML.
71 // dc:subject is "Subject and Keywords":
72 // "Typically, Subject will be expressed as keywords, key phrases
73 // or classification codes that describe a topic of the resource."
74 // OpenOffice uses meta:keywords for keywords - dc:subject
75 // comes from a text field labelled "Subject". Let's just treat
76 // it as more keywords.
78 } else if (tag
== "dc:title") {
79 // OpenDocument, MSXML.
81 } else if (tag
== "dc:description") {
82 // OpenDocument, MSXML.
84 } else if (tag
== "dc:creator") {
85 // OpenDocument, MSXML.
87 } else if (tag
== "dcterms:created") {
91 } else if (tag
[0] == 'm') {
92 if (tag
== "meta:keyword") {
97 // <meta:keyword>information retrieval</meta:keyword>
100 } else if (tag
== "meta:creation-date") {
103 } else if (tag
== "meta:document-statistic") {
106 // The values we want for the page count are to be found as
107 // attributes of the meta:document-statistic tag (which occurs
108 // inside <office:meta> but we don't bother to check that).
110 // For text documents, we want the meta:page-count attribute.
112 // For spreadsheets, meta:table-count seems to give the sheet count
113 // (text documents also have meta:table-count so we check for this
114 // after meta:page-count).
116 if (get_attribute("meta:page-count", value
) ||
117 get_attribute("meta:table-count", value
)) {
119 if (parse_unsigned(value
.c_str(), u_pages
))
120 pages
= int(u_pages
);
123 } else if (tag
[0] == 'c' && tag
[1] == 'p') {
124 if (tag
== "cp:keywords") {
133 OpenDocMetaParser::closing_tag(const string
&)