scriptindex: Fix weird error cases
[xapian.git] / xapian-applications / omega / metaxmlparse.cc
blobcf09145859c030cf7e7f2fb8109b9d8d1afb0d5f
1 /** @file
2 * @brief Parser for OpenDocument's meta.xml.
3 */
4 /* Copyright (C) 2006,2009,2010,2011,2013,2015,2020 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "metaxmlparse.h"
25 #include "datetime.h"
27 using namespace std;
29 void
30 MetaXmlParser::process_text(const string &text)
32 switch (field) {
33 case KEYWORDS:
34 if (!keywords.empty()) keywords += ' ';
35 keywords += text;
36 break;
37 case TITLE:
38 if (!title.empty()) title += ' ';
39 title += text;
40 break;
41 case SAMPLE:
42 if (!sample.empty()) sample += ' ';
43 sample += text;
44 break;
45 case AUTHOR:
46 if (!author.empty()) author += ' ';
47 author += text;
48 break;
49 case TOPIC:
50 if (!topic.empty()) topic += ' ';
51 topic += text;
52 break;
53 case CREATED: {
54 // E.g. 2013-03-04T22:57:00
55 created = parse_datetime(text);
56 break;
58 case NONE:
59 // Ignore other fields.
60 break;
64 bool
65 MetaXmlParser::opening_tag(const string &tag)
67 if (tag.size() < 8) return true;
68 if (tag[0] == 'd' && tag[1] == 'c') {
69 if (tag == "dc:subject") {
70 // dc:subject is "Subject and Keywords":
71 // "Typically, Subject will be expressed as keywords, key phrases
72 // or classification codes that describe a topic of the resource."
73 // OpenOffice uses meta:keywords for keywords - dc:subject
74 // comes from a text field labelled "Subject". Let's just treat
75 // it as more keywords.
76 field = KEYWORDS;
77 } else if (tag == "dc:title") {
78 field = TITLE;
79 } else if (tag == "dc:description") {
80 field = SAMPLE;
81 } else if (tag == "dc:creator") {
82 field = AUTHOR;
83 } else if (tag == "dc:subject") {
84 field = TOPIC;
86 } else if (tag[0] == 'm') {
87 if (tag == "meta:keyword") {
88 // e.g.:
89 // <meta:keywords>
90 // <meta:keyword>information retrieval</meta:keyword>
91 // </meta:keywords>
92 field = KEYWORDS;
93 } else if (tag == "meta:creation-date") {
94 field = CREATED;
97 return true;
100 bool
101 MetaXmlParser::closing_tag(const string &)
103 field = NONE;
104 return true;