Revert "Fix integer type used by ESet"
[xapian.git] / xapian-core / api / valuerangeproc.cc
blob86e32aa17422e1d23351ed06f50fd3ee3d4bd6f4
1 /** @file
2 * @brief Standard ValueRangeProcessor and RangeProcessor subclasses
3 */
4 /* Copyright (C) 2007,2008,2009,2010,2012,2016,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include <xapian/queryparser.h>
25 #include <cerrno>
26 #include <cstdlib> // For atoi().
28 #include <string>
29 #include "stringutils.h"
31 using namespace std;
33 namespace Xapian {
35 Xapian::valueno
36 StringValueRangeProcessor::operator()(string &begin, string &end)
38 if (str.size()) {
39 if (prefix) {
40 // If there's a prefix, require it on the start of the range.
41 if (!startswith(begin, str)) {
42 // Prefix not given.
43 return Xapian::BAD_VALUENO;
45 begin.erase(0, str.size());
46 // But it's optional on the end of the range, e.g. $10..50
47 if (startswith(end, str)) {
48 end.erase(0, str.size());
50 } else {
51 // If there's a suffix, require it on the end of the range.
52 if (!endswith(end, str)) {
53 // Suffix not given.
54 return Xapian::BAD_VALUENO;
56 end.resize(end.size() - str.size());
57 // But it's optional on the start of the range, e.g. 10..50kg
58 if (endswith(begin, str)) {
59 begin.resize(begin.size() - str.size());
63 return valno;
66 static bool
67 decode_xxy(const string & s, int & x1, int &x2, int &y)
69 if (s.size() == 0) {
70 x1 = x2 = y = -1;
71 return true;
73 if (s.size() < 5 || s.size() > 10) return false;
74 size_t i = s.find_first_not_of("0123456789");
75 if (i < 1 || i > 2 || !(s[i] == '/' || s[i] == '-' || s[i] == '.'))
76 return false;
77 size_t j = s.find_first_not_of("0123456789", i + 1);
78 if (j - (i + 1) < 1 || j - (i + 1) > 2 ||
79 !(s[j] == '/' || s[j] == '-' || s[j] == '.'))
80 return false;
81 if (s.size() - j > 4 + 1) return false;
82 if (s.find_first_not_of("0123456789", j + 1) != string::npos)
83 return false;
84 x1 = atoi(s.c_str());
85 if (x1 < 1 || x1 > 31) return false;
86 x2 = atoi(s.c_str() + i + 1);
87 if (x2 < 1 || x2 > 31) return false;
88 y = atoi(s.c_str() + j + 1);
89 return true;
92 // We just use this to decide if an ambiguous aa/bb/cc date could be a
93 // particular format, so there's no need to be anal about the exact number of
94 // days in February. The most useful check is that the month field is <= 12
95 // so we could just check the day is <= 31 really.
96 static const char max_month_length[12] = {
97 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
100 static bool
101 vet_dm(int d, int m)
103 if (m == -1) return true;
104 if (m > 12 || m < 1) return false;
105 if (d < 1 || d > max_month_length[m - 1]) return false;
106 return true;
109 // NB Assumes the length has been checked to be 10 already.
110 static bool
111 is_yyyy_mm_dd(const string &s)
113 return (s.find_first_not_of("0123456789") == 4 &&
114 s.find_first_not_of("0123456789", 5) == 7 &&
115 s.find_first_not_of("0123456789", 8) == string::npos &&
116 s[4] == s[7] &&
117 (s[4] == '-' || s[4] == '.' || s[4] == '/'));
120 // Write exactly w chars to buffer p representing integer v.
122 // The result is left padded with zeros if v < pow(10, w - 1).
124 // If v >= pow(10, w), then the output will show v % pow(10, w) (i.e. the
125 // most significant digits are lost).
126 static void
127 format_int_fixed_width(char * p, int v, int w)
129 while (--w >= 0) {
130 p[w] = '0' + (v % 10);
131 v /= 10;
135 static void
136 format_yyyymmdd(char * p, int y, int m, int d)
138 format_int_fixed_width(p, y, 4);
139 format_int_fixed_width(p + 4, m, 2);
140 format_int_fixed_width(p + 6, d, 2);
143 Xapian::valueno
144 DateValueRangeProcessor::operator()(string &begin, string &end)
146 if (StringValueRangeProcessor::operator()(begin, end) == BAD_VALUENO)
147 return BAD_VALUENO;
149 if ((begin.size() == 8 || begin.size() == 0) &&
150 (end.size() == 8 || end.size() == 0) &&
151 begin.find_first_not_of("0123456789") == string::npos &&
152 end.find_first_not_of("0123456789") == string::npos) {
153 // YYYYMMDD
154 return valno;
156 if ((begin.size() == 10 || begin.size() == 0) &&
157 (end.size() == 10 || end.size() == 0)) {
158 if ((begin.empty() || is_yyyy_mm_dd(begin)) &&
159 (end.empty() || is_yyyy_mm_dd(end))) {
160 // YYYY-MM-DD
161 if (!begin.empty()) {
162 begin.erase(7, 1);
163 begin.erase(4, 1);
165 if (!end.empty()) {
166 end.erase(7, 1);
167 end.erase(4, 1);
169 return valno;
173 int b_d, b_m, b_y;
174 int e_d, e_m, e_y;
175 if (!decode_xxy(begin, b_d, b_m, b_y) || !decode_xxy(end, e_d, e_m, e_y))
176 return Xapian::BAD_VALUENO;
178 // Check that the month and day are within range. Also assume "start" <=
179 // "end" to help decide ambiguous cases.
180 if (!prefer_mdy && vet_dm(b_d, b_m) && vet_dm(e_d, e_m) &&
181 (b_y != e_y || b_m < e_m || (b_m == e_m && b_d <= e_d))) {
182 // OK.
183 } else if (vet_dm(b_m, b_d) && vet_dm(e_m, e_d) &&
184 (b_y != e_y || b_d < e_d || (b_d == e_d && b_m <= e_m))) {
185 swap(b_m, b_d);
186 swap(e_m, e_d);
187 } else if (prefer_mdy && vet_dm(b_d, b_m) && vet_dm(e_d, e_m) &&
188 (b_y != e_y || b_m < e_m || (b_m == e_m && b_d <= e_d))) {
189 // OK.
190 } else {
191 return Xapian::BAD_VALUENO;
194 char buf[8];
195 if (!begin.empty()) {
196 if (b_y < 100) {
197 b_y += 1900;
198 if (b_y < epoch_year) b_y += 100;
200 format_yyyymmdd(buf, b_y, b_m, b_d);
201 begin.assign(buf, 8);
203 if (!end.empty()) {
204 if (e_y < 100) {
205 e_y += 1900;
206 if (e_y < epoch_year) e_y += 100;
208 format_yyyymmdd(buf, e_y, e_m, e_d);
209 end.assign(buf, 8);
211 return valno;
214 Xapian::valueno
215 NumberValueRangeProcessor::operator()(string &begin, string &end)
217 if (StringValueRangeProcessor::operator()(begin, end) == BAD_VALUENO)
218 return BAD_VALUENO;
220 // Parse the numbers to floating point.
221 double beginnum;
223 if (!begin.empty()) {
224 errno = 0;
225 const char * startptr = begin.c_str();
226 char * endptr;
227 beginnum = strtod(startptr, &endptr);
228 if (endptr != startptr + begin.size())
229 // Invalid characters in string
230 return Xapian::BAD_VALUENO;
231 if (errno)
232 // Overflow or underflow
233 return Xapian::BAD_VALUENO;
234 } else {
235 // Silence GCC warning.
236 beginnum = 0.0;
239 if (!end.empty()) {
240 errno = 0;
241 const char * startptr = end.c_str();
242 char * endptr;
243 double endnum = strtod(startptr, &endptr);
244 if (endptr != startptr + end.size())
245 // Invalid characters in string
246 return Xapian::BAD_VALUENO;
247 if (errno)
248 // Overflow or underflow
249 return Xapian::BAD_VALUENO;
250 end.assign(Xapian::sortable_serialise(endnum));
253 if (!begin.empty()) {
254 begin.assign(Xapian::sortable_serialise(beginnum));
257 return valno;
260 Xapian::Query
261 RangeProcessor::check_range(const string& b, const string& e)
263 if (str.empty())
264 return operator()(b, e);
266 size_t off_b = 0, len_b = string::npos;
267 size_t off_e = 0, len_e = string::npos;
269 bool prefix = !(flags & Xapian::RP_SUFFIX);
270 bool repeated = (flags & Xapian::RP_REPEATED);
272 if (prefix) {
273 // If there's a prefix, require it on the start of the range.
274 if (!startswith(b, str)) {
275 // Prefix not given.
276 goto not_our_range;
278 off_b = str.size();
279 // Optionally allow it on the end of the range, e.g. $10..50
280 if (repeated && startswith(e, str)) {
281 off_e = off_b;
283 } else {
284 // If there's a suffix, require it on the end of the range.
285 if (!endswith(e, str)) {
286 // Suffix not given.
287 goto not_our_range;
289 len_e = e.size() - str.size();
290 // Optionally allow it on the start of the range, e.g. 10..50kg
291 if (repeated && endswith(b, str)) {
292 len_b = b.size() - str.size();
296 return operator()(string(b, off_b, len_b), string(e, off_e, len_e));
298 not_our_range:
299 return Xapian::Query(Xapian::Query::OP_INVALID);
302 Xapian::Query
303 RangeProcessor::operator()(const string& b, const string& e)
305 if (e.empty())
306 return Xapian::Query(Xapian::Query::OP_VALUE_GE, slot, b);
307 return Xapian::Query(Xapian::Query::OP_VALUE_RANGE, slot, b, e);
310 Xapian::Query
311 DateRangeProcessor::operator()(const string& b, const string& e)
313 if ((b.size() == 8 || b.size() == 0) &&
314 (e.size() == 8 || e.size() == 0) &&
315 b.find_first_not_of("0123456789") == string::npos &&
316 e.find_first_not_of("0123456789") == string::npos) {
317 // YYYYMMDD
318 return RangeProcessor::operator()(b, e);
320 if ((b.size() == 10 || b.size() == 0) &&
321 (e.size() == 10 || e.size() == 0)) {
322 if ((b.empty() || is_yyyy_mm_dd(b)) &&
323 (e.empty() || is_yyyy_mm_dd(e))) {
324 string begin = b, end = e;
325 // YYYY-MM-DD
326 if (!begin.empty()) {
327 begin.erase(7, 1);
328 begin.erase(4, 1);
330 if (!end.empty()) {
331 end.erase(7, 1);
332 end.erase(4, 1);
334 return RangeProcessor::operator()(begin, end);
338 bool prefer_mdy = (flags & Xapian::RP_DATE_PREFER_MDY);
339 int b_d, b_m, b_y;
340 int e_d, e_m, e_y;
341 if (!decode_xxy(b, b_d, b_m, b_y) || !decode_xxy(e, e_d, e_m, e_y))
342 goto not_our_range;
344 // Check that the month and day are within range. Also assume "start" <=
345 // "e" to help decide ambiguous cases.
346 if (!prefer_mdy && vet_dm(b_d, b_m) && vet_dm(e_d, e_m) &&
347 (b_y != e_y || b_m < e_m || (b_m == e_m && b_d <= e_d))) {
348 // OK.
349 } else if (vet_dm(b_m, b_d) && vet_dm(e_m, e_d) &&
350 (b_y != e_y || b_d < e_d || (b_d == e_d && b_m <= e_m))) {
351 swap(b_m, b_d);
352 swap(e_m, e_d);
353 } else if (prefer_mdy && vet_dm(b_d, b_m) && vet_dm(e_d, e_m) &&
354 (b_y != e_y || b_m < e_m || (b_m == e_m && b_d <= e_d))) {
355 // OK.
356 } else {
357 goto not_our_range;
361 char buf_b[8], buf_e[8];
362 size_t len_b = 0, len_e = 0;
363 if (!b.empty()) {
364 if (b_y < 100) {
365 b_y += 1900;
366 if (b_y < epoch_year) b_y += 100;
368 format_yyyymmdd(buf_b, b_y, b_m, b_d);
369 len_b = 8;
371 if (!e.empty()) {
372 if (e_y < 100) {
373 e_y += 1900;
374 if (e_y < epoch_year) e_y += 100;
376 format_yyyymmdd(buf_e, e_y, e_m, e_d);
377 len_e = 8;
379 return RangeProcessor::operator()(string(buf_b, len_b),
380 string(buf_e, len_e));
383 not_our_range:
384 return Xapian::Query(Xapian::Query::OP_INVALID);
387 Xapian::Query
388 NumberRangeProcessor::operator()(const string& b, const string& e)
390 // Parse the numbers to floating point.
391 double num_b, num_e;
393 if (!b.empty()) {
394 errno = 0;
395 const char * startptr = b.c_str();
396 char * endptr;
397 num_b = strtod(startptr, &endptr);
398 if (endptr != startptr + b.size() || errno) {
399 // Invalid characters in string || overflow or underflow.
400 goto not_our_range;
402 } else {
403 // Silence GCC warning.
404 num_b = 0.0;
407 if (!e.empty()) {
408 errno = 0;
409 const char * startptr = e.c_str();
410 char * endptr;
411 num_e = strtod(startptr, &endptr);
412 if (endptr != startptr + e.size() || errno) {
413 // Invalid characters in string || overflow or underflow.
414 goto not_our_range;
416 } else {
417 // Silence GCC warning.
418 num_e = 0.0;
421 return RangeProcessor::operator()(
422 b.empty() ? b : Xapian::sortable_serialise(num_b),
423 e.empty() ? e : Xapian::sortable_serialise(num_e));
425 not_our_range:
426 return Xapian::Query(Xapian::Query::OP_INVALID);