2 * @brief Extract fields from XLSX sheet*.xml.
4 /* Copyright (C) 2012,2013,2021 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "xlsxparser.h"
32 XlsxParser::opening_tag(const string
&tag
)
35 // We need to distinguish <v> tags which are inside <c t="s">, as these
36 // are numeric references to shared strings.
38 if (get_attribute("t", type
) && type
== "s") {
41 mode
= MODE_C_LITERAL
;
42 if (get_attribute("s", type
)) {
43 unsigned long style_id
= strtoul(type
.c_str(), NULL
, 10);
44 if (date_style
.find(style_id
) != date_style
.end()) {
49 } else if (tag
== "v") {
50 if (mode
== MODE_C_LITERAL
) {
51 mode
= MODE_V_LITERAL
;
52 } else if (mode
== MODE_C_STRING
) {
54 } else if (mode
== MODE_C_DATE
) {
57 } else if (tag
== "si") {
59 } else if (tag
== "sst") {
61 if (get_attribute("uniqueCount", unique_count
)) {
62 unsigned long c
= strtoul(unique_count
.c_str(), NULL
, 10);
63 // This reserving is just a performance tweak, so don't go
64 // reserving ludicrous amounts of space just because an XML
65 // attribute told us to.
66 sst
.reserve(std::min(c
, 1000000ul));
68 } else if (tag
== "workbookPr") {
70 if (get_attribute("date1904", v
)) {
71 date1904
= (v
== "true" || v
== "1");
73 } else if (tag
== "numFmt") {
75 if (get_attribute("formatCode", formatcode
)) {
76 // Heuristic for "date format" (FIXME: implement properly)
77 if (strchr(formatcode
.c_str(), 'd') &&
78 strchr(formatcode
.c_str(), 'm') &&
79 strchr(formatcode
.c_str(), 'y')) {
81 if (get_attribute("numFmtId", v
)) {
82 unsigned long id
= strtoul(v
.c_str(), NULL
, 10);
83 date_format
.insert(id
);
87 } else if (tag
== "cellXfs") {
89 } else if (tag
== "xf") {
90 if (mode
== MODE_CELLXFS
) {
92 if (get_attribute("numFmtId", v
)) {
93 unsigned long id
= strtoul(v
.c_str(), NULL
, 10);
94 if ((id
>= 14 && id
<= 17) ||
95 date_format
.find(id
) != date_format
.end()) {
96 date_style
.insert(style_index
);
106 XlsxParser::process_content(const string
& content
)
111 unsigned long c
= strtoul(content
.c_str(), NULL
, 10);
115 // The spec insists we treat 1900 as a leap year!
119 time_t t
= c
* 86400 + 43200;
120 struct tm
* tm
= gmtime(&t
);
123 size_t res
= strftime(buf
, sizeof(buf
), "%Y-%m-%d", tm
);
125 append_field(string(buf
, res
));
130 case MODE_V_STRING
: {
131 // Shared string use.
132 unsigned long c
= strtoul(content
.c_str(), NULL
, 10);
133 if (c
< sst
.size()) {
134 append_field(sst
[c
]);
140 // Literal (possibly calculated) field value.
141 append_field(content
);
145 // Shared string definition.
146 sst
.push_back(content
);