2 * @brief Standard ValueRangeProcessor and RangeProcessor subclasses
4 /* Copyright (C) 2007,2008,2009,2010,2012,2016,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <xapian/queryparser.h>
26 #include <cstdlib> // For atoi().
29 #include "stringutils.h"
36 StringValueRangeProcessor::operator()(string
&begin
, string
&end
)
40 // If there's a prefix, require it on the start of the range.
41 if (!startswith(begin
, str
)) {
43 return Xapian::BAD_VALUENO
;
45 begin
.erase(0, str
.size());
46 // But it's optional on the end of the range, e.g. $10..50
47 if (startswith(end
, str
)) {
48 end
.erase(0, str
.size());
51 // If there's a suffix, require it on the end of the range.
52 if (!endswith(end
, str
)) {
54 return Xapian::BAD_VALUENO
;
56 end
.resize(end
.size() - str
.size());
57 // But it's optional on the start of the range, e.g. 10..50kg
58 if (endswith(begin
, str
)) {
59 begin
.resize(begin
.size() - str
.size());
67 decode_xxy(const string
& s
, int & x1
, int &x2
, int &y
)
73 if (s
.size() < 5 || s
.size() > 10) return false;
74 size_t i
= s
.find_first_not_of("0123456789");
75 if (i
< 1 || i
> 2 || !(s
[i
] == '/' || s
[i
] == '-' || s
[i
] == '.'))
77 size_t j
= s
.find_first_not_of("0123456789", i
+ 1);
78 if (j
- (i
+ 1) < 1 || j
- (i
+ 1) > 2 ||
79 !(s
[j
] == '/' || s
[j
] == '-' || s
[j
] == '.'))
81 if (s
.size() - j
> 4 + 1) return false;
82 if (s
.find_first_not_of("0123456789", j
+ 1) != string::npos
)
85 if (x1
< 1 || x1
> 31) return false;
86 x2
= atoi(s
.c_str() + i
+ 1);
87 if (x2
< 1 || x2
> 31) return false;
88 y
= atoi(s
.c_str() + j
+ 1);
92 // We just use this to decide if an ambiguous aa/bb/cc date could be a
93 // particular format, so there's no need to be anal about the exact number of
94 // days in February. The most useful check is that the month field is <= 12
95 // so we could just check the day is <= 31 really.
96 static const char max_month_length
[12] = {
97 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
103 if (m
== -1) return true;
104 if (m
> 12 || m
< 1) return false;
105 if (d
< 1 || d
> max_month_length
[m
- 1]) return false;
109 // NB Assumes the length has been checked to be 10 already.
111 is_yyyy_mm_dd(const string
&s
)
113 return (s
.find_first_not_of("0123456789") == 4 &&
114 s
.find_first_not_of("0123456789", 5) == 7 &&
115 s
.find_first_not_of("0123456789", 8) == string::npos
&&
117 (s
[4] == '-' || s
[4] == '.' || s
[4] == '/'));
120 // Write exactly w chars to buffer p representing integer v.
122 // The result is left padded with zeros if v < pow(10, w - 1).
124 // If v >= pow(10, w), then the output will show v % pow(10, w) (i.e. the
125 // most significant digits are lost).
127 format_int_fixed_width(char * p
, int v
, int w
)
130 p
[w
] = '0' + (v
% 10);
136 format_yyyymmdd(char * p
, int y
, int m
, int d
)
138 format_int_fixed_width(p
, y
, 4);
139 format_int_fixed_width(p
+ 4, m
, 2);
140 format_int_fixed_width(p
+ 6, d
, 2);
144 DateValueRangeProcessor::operator()(string
&begin
, string
&end
)
146 if (StringValueRangeProcessor::operator()(begin
, end
) == BAD_VALUENO
)
149 if ((begin
.size() == 8 || begin
.size() == 0) &&
150 (end
.size() == 8 || end
.size() == 0) &&
151 begin
.find_first_not_of("0123456789") == string::npos
&&
152 end
.find_first_not_of("0123456789") == string::npos
) {
156 if ((begin
.size() == 10 || begin
.size() == 0) &&
157 (end
.size() == 10 || end
.size() == 0)) {
158 if ((begin
.empty() || is_yyyy_mm_dd(begin
)) &&
159 (end
.empty() || is_yyyy_mm_dd(end
))) {
161 if (!begin
.empty()) {
175 if (!decode_xxy(begin
, b_d
, b_m
, b_y
) || !decode_xxy(end
, e_d
, e_m
, e_y
))
176 return Xapian::BAD_VALUENO
;
178 // Check that the month and day are within range. Also assume "start" <=
179 // "end" to help decide ambiguous cases.
180 if (!prefer_mdy
&& vet_dm(b_d
, b_m
) && vet_dm(e_d
, e_m
) &&
181 (b_y
!= e_y
|| b_m
< e_m
|| (b_m
== e_m
&& b_d
<= e_d
))) {
183 } else if (vet_dm(b_m
, b_d
) && vet_dm(e_m
, e_d
) &&
184 (b_y
!= e_y
|| b_d
< e_d
|| (b_d
== e_d
&& b_m
<= e_m
))) {
187 } else if (prefer_mdy
&& vet_dm(b_d
, b_m
) && vet_dm(e_d
, e_m
) &&
188 (b_y
!= e_y
|| b_m
< e_m
|| (b_m
== e_m
&& b_d
<= e_d
))) {
191 return Xapian::BAD_VALUENO
;
195 if (!begin
.empty()) {
198 if (b_y
< epoch_year
) b_y
+= 100;
200 format_yyyymmdd(buf
, b_y
, b_m
, b_d
);
201 begin
.assign(buf
, 8);
206 if (e_y
< epoch_year
) e_y
+= 100;
208 format_yyyymmdd(buf
, e_y
, e_m
, e_d
);
215 NumberValueRangeProcessor::operator()(string
&begin
, string
&end
)
217 if (StringValueRangeProcessor::operator()(begin
, end
) == BAD_VALUENO
)
220 // Parse the numbers to floating point.
223 if (!begin
.empty()) {
225 const char * startptr
= begin
.c_str();
227 beginnum
= strtod(startptr
, &endptr
);
228 if (endptr
!= startptr
+ begin
.size())
229 // Invalid characters in string
230 return Xapian::BAD_VALUENO
;
232 // Overflow or underflow
233 return Xapian::BAD_VALUENO
;
235 // Silence GCC warning.
241 const char * startptr
= end
.c_str();
243 double endnum
= strtod(startptr
, &endptr
);
244 if (endptr
!= startptr
+ end
.size())
245 // Invalid characters in string
246 return Xapian::BAD_VALUENO
;
248 // Overflow or underflow
249 return Xapian::BAD_VALUENO
;
250 end
.assign(Xapian::sortable_serialise(endnum
));
253 if (!begin
.empty()) {
254 begin
.assign(Xapian::sortable_serialise(beginnum
));
261 RangeProcessor::check_range(const string
& b
, const string
& e
)
264 return operator()(b
, e
);
266 size_t off_b
= 0, len_b
= string::npos
;
267 size_t off_e
= 0, len_e
= string::npos
;
269 bool prefix
= !(flags
& Xapian::RP_SUFFIX
);
270 bool repeated
= (flags
& Xapian::RP_REPEATED
);
273 // If there's a prefix, require it on the start of the range.
274 if (!startswith(b
, str
)) {
279 // Optionally allow it on the end of the range, e.g. $10..50
280 if (repeated
&& startswith(e
, str
)) {
284 // If there's a suffix, require it on the end of the range.
285 if (!endswith(e
, str
)) {
289 len_e
= e
.size() - str
.size();
290 // Optionally allow it on the start of the range, e.g. 10..50kg
291 if (repeated
&& endswith(b
, str
)) {
292 len_b
= b
.size() - str
.size();
296 return operator()(string(b
, off_b
, len_b
), string(e
, off_e
, len_e
));
299 return Xapian::Query(Xapian::Query::OP_INVALID
);
303 RangeProcessor::operator()(const string
& b
, const string
& e
)
306 return Xapian::Query(Xapian::Query::OP_VALUE_GE
, slot
, b
);
307 return Xapian::Query(Xapian::Query::OP_VALUE_RANGE
, slot
, b
, e
);
311 DateRangeProcessor::operator()(const string
& b
, const string
& e
)
313 if ((b
.size() == 8 || b
.size() == 0) &&
314 (e
.size() == 8 || e
.size() == 0) &&
315 b
.find_first_not_of("0123456789") == string::npos
&&
316 e
.find_first_not_of("0123456789") == string::npos
) {
318 return RangeProcessor::operator()(b
, e
);
320 if ((b
.size() == 10 || b
.size() == 0) &&
321 (e
.size() == 10 || e
.size() == 0)) {
322 if ((b
.empty() || is_yyyy_mm_dd(b
)) &&
323 (e
.empty() || is_yyyy_mm_dd(e
))) {
324 string begin
= b
, end
= e
;
326 if (!begin
.empty()) {
334 return RangeProcessor::operator()(begin
, end
);
338 bool prefer_mdy
= (flags
& Xapian::RP_DATE_PREFER_MDY
);
341 if (!decode_xxy(b
, b_d
, b_m
, b_y
) || !decode_xxy(e
, e_d
, e_m
, e_y
))
344 // Check that the month and day are within range. Also assume "start" <=
345 // "e" to help decide ambiguous cases.
346 if (!prefer_mdy
&& vet_dm(b_d
, b_m
) && vet_dm(e_d
, e_m
) &&
347 (b_y
!= e_y
|| b_m
< e_m
|| (b_m
== e_m
&& b_d
<= e_d
))) {
349 } else if (vet_dm(b_m
, b_d
) && vet_dm(e_m
, e_d
) &&
350 (b_y
!= e_y
|| b_d
< e_d
|| (b_d
== e_d
&& b_m
<= e_m
))) {
353 } else if (prefer_mdy
&& vet_dm(b_d
, b_m
) && vet_dm(e_d
, e_m
) &&
354 (b_y
!= e_y
|| b_m
< e_m
|| (b_m
== e_m
&& b_d
<= e_d
))) {
361 char buf_b
[8], buf_e
[8];
362 size_t len_b
= 0, len_e
= 0;
366 if (b_y
< epoch_year
) b_y
+= 100;
368 format_yyyymmdd(buf_b
, b_y
, b_m
, b_d
);
374 if (e_y
< epoch_year
) e_y
+= 100;
376 format_yyyymmdd(buf_e
, e_y
, e_m
, e_d
);
379 return RangeProcessor::operator()(string(buf_b
, len_b
),
380 string(buf_e
, len_e
));
384 return Xapian::Query(Xapian::Query::OP_INVALID
);
388 NumberRangeProcessor::operator()(const string
& b
, const string
& e
)
390 // Parse the numbers to floating point.
395 const char * startptr
= b
.c_str();
397 num_b
= strtod(startptr
, &endptr
);
398 if (endptr
!= startptr
+ b
.size() || errno
) {
399 // Invalid characters in string || overflow or underflow.
403 // Silence GCC warning.
409 const char * startptr
= e
.c_str();
411 num_e
= strtod(startptr
, &endptr
);
412 if (endptr
!= startptr
+ e
.size() || errno
) {
413 // Invalid characters in string || overflow or underflow.
417 // Silence GCC warning.
421 return RangeProcessor::operator()(
422 b
.empty() ? b
: Xapian::sortable_serialise(num_b
),
423 e
.empty() ? e
: Xapian::sortable_serialise(num_e
));
426 return Xapian::Query(Xapian::Query::OP_INVALID
);