Fix tg_termpos1 for 64-bit termpos
[xapian.git] / xapian-applications / omega / fields.h
blob96e4c7e8b52a673d782cbcbe0f89dd8c7c31a038
1 /** @file
2 * @brief Field parsing for Omega.
3 */
4 /* Copyright 2018 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
22 #ifndef OMEGA_INCLUDED_FIELDS_H
23 #define OMEGA_INCLUDED_FIELDS_H
25 #include <unordered_map>
26 #include <string>
27 #include <utility>
29 class Fields {
30 std::unordered_map<std::string, std::string> fields;
32 public:
33 Fields() {}
35 /** Parse fields.
37 * @param data The data to parse.
39 * @param names If non-NULL and not pointing to an empty string, then
40 * specifies the field names for corresponding lines in
41 * @a data.
43 void parse_fields(const std::string& data, const std::string* names);
45 /// Lookup field @a name.
46 const std::string& get_field(const std::string& name) const {
47 auto it = fields.find(name);
48 if (it != fields.end()) {
49 return it->second;
51 const static std::string empty_string;
52 return empty_string;
56 inline void
57 Fields::parse_fields(const std::string& data,
58 const std::string* names)
60 fields.clear();
62 if (names && !names->empty()) {
63 // Each line in data is a field, with field names taken from
64 // corresponding entries in the tab-separated list specified by names.
65 std::string::size_type v = 0;
66 std::string::size_type n = 0;
67 do {
68 std::string::size_type n_start = n;
69 n = names->find('\t', n);
70 std::string::size_type v_start = v;
71 v = data.find('\n', v);
72 fields.emplace(names->substr(n_start, n - n_start),
73 data.substr(v_start, v - v_start));
74 // If n or v is std::string::npos then incrementing wraps to 0.
75 } while (++n && ++v);
76 return;
79 // Each line specifies a field in the format NAME=VALUE, where NAME doesn't
80 // contain "=" but VALUE may.
81 std::string::size_type i = 0;
82 do {
83 std::string::size_type n_start = i;
84 char ch;
85 while ((ch = data[i]) != '=') {
86 // Fast test for '\n' or '\0', with false positives for '\x02' and
87 // '\x08' (the latter two are probably unlikely in this context).
88 if (rare((ch &~ '\n') == 0)) {
89 // Lines without an '=' should be rare.
90 if (ch == '\n') {
91 // No "=" in this line - such lines are ignored.
92 ++i;
93 continue;
95 if (i == data.size()) {
96 return;
99 ++i;
102 std::string::size_type eq = i;
104 // Scan ahead to the end of the line.
105 while ((ch = data[++i]) != '\n') {
106 if (ch == '\0' && i == data.size()) {
107 i = std::string::npos;
108 break;
112 std::string::size_type v = eq + 1;
113 std::string name(data, n_start, eq - n_start);
114 auto r = fields.emplace(name, std::string());
115 if (r.second) {
116 // New entry - fill in.
117 r.first->second.assign(data, v, i - v);
118 } else {
119 // Existing entry, so accumulate values as tab-separated list.
120 std::string& value = r.first->second;
121 value += '\t';
122 value.append(data, v, i - v);
124 // If i is std::string::npos then incrementing wraps to 0.
125 } while (++i);
128 #endif // OMEGA_INCLUDED_FIELDS_H